1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-01 02:40:43 +02:00

#368 GBA optimization with FIQ registers

This commit is contained in:
XProger
2022-04-23 16:15:06 +03:00
parent 017446b9f7
commit e821f236a2
28 changed files with 670 additions and 686 deletions

View File

@@ -1,7 +1,7 @@
#include "common_asm.inc"
v .req r0
q .req r1
v .req r0 // arg
q .req r1 // arg
min .req q
max .req r2

View File

@@ -1,19 +1,20 @@
#include "common_asm.inc"
aabb .req r0
x .req r1
y .req r2
z .req r3
minX .req r4
maxX .req r5
minY .req r6
maxY .req r7
aabb .req r0 // arg
x .req r1 // arg
y .req r2 // arg
z .req r3 // arg
// FIQ regs
minX .req r8
maxX .req r9
minY .req r10
maxY .req r11
minZ .req r12
maxZ .req lr
maxZ .req r13
.global boxTranslate_asm
boxTranslate_asm:
stmfd sp!, {r4-r7, lr}
fiq_on
ldmia aabb, {minX, maxX, minY, maxY, minZ, maxZ}
add minX, minX, x
@@ -24,5 +25,5 @@ boxTranslate_asm:
add maxZ, maxZ, z
stmia aabb, {minX, maxX, minY, maxY, minZ, maxZ}
ldmfd sp!, {r4-r7, lr}
fiq_off
bx lr

View File

@@ -120,15 +120,17 @@
ble \skip
.endm
.macro scaleUV uv, u, v, f
asr \u, \uv, #16
mul \u, \f // u = f * int16(uv >> 16)
lsl \v, \uv, #16
asr \v, #16
mul \v, \f // v = f * int16(uv)
lsr \u, #16
lsl \u, #16
orr \uv, \u, \v, lsr #16 // uv = (u & 0xFFFF0000) | (v >> 16)
.macro scaleUV uv, tmp, f
asr \tmp, \uv, #16
mul \tmp, \f // u = f * int16(uv >> 16)
lsl \uv, \uv, #16
asr \uv, #16
mul \uv, \f // v = f * int16(uv)
lsr \tmp, #16
lsl \tmp, #16
orr \uv, \tmp, \uv, lsr #16 // uv = (u & 0xFFFF0000) | (v >> 16)
.endm
.macro tex index, uv
@@ -140,3 +142,19 @@
.macro lit index
ldrb \index, [LMAP, \index]
.endm
.macro fiq_on
msr cpsr, #0x11 // switch r8-r14 to FIQ (IRQ enabled)
.endm
.macro fiq_off
msr cpsr, #0x1F // restore r8-r14
.endm
.macro fiq_on_ne
msrne cpsr, #0x11 // switch r8-r14 to FIQ (IRQ enabled)
.endm
.macro fiq_off_ne
msrne cpsr, #0x1F // restore r8-r14
.endm

View File

@@ -1,19 +1,21 @@
#include "common_asm.inc"
polys .req r0
count .req r1
polys .req r0 // arg
count .req r1 // arg
vp .req r2
vg0 .req r3
vg1 .req r4
vg2 .req r5
vg3 .req r6
flags .req r7
// FIQ regs
vp0 .req r8
vp1 .req r9
vp2 .req r10
vp3 .req r11
ot .req r12
face .req lr
face .req r13
vertices .req r14
vx0 .req vg0
vy0 .req vg1
@@ -29,21 +31,18 @@ vz3 .req vg3
depth .req vg0
tmp .req flags
vertices .req vg2
next .req vp0
SP_SIZE = 4
.global faceAddMeshQuads_asm
faceAddMeshQuads_asm:
stmfd sp!, {r4-r11, lr}
stmfd sp!, {r4-r7}
fiq_on
ldr vp, =gVerticesBase
ldr vp, [vp]
ldr vertices, =gVertices
lsr vertices, #3
stmfd sp!, {vertices}
ldr face, =gFacesBase
ldr face, [face]
@@ -97,7 +96,6 @@ faceAddMeshQuads_asm:
lsr depth, #(2 + OT_SHIFT)
// faceAdd
ldr vertices, [sp]
rsb vp0, vertices, vp0, lsr #3
rsb vp1, vertices, vp1, lsr #3
rsb vp2, vertices, vp2, lsr #3
@@ -116,5 +114,6 @@ faceAddMeshQuads_asm:
ldr tmp, =gFacesBase
str face, [tmp]
add sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}
fiq_off
ldmfd sp!, {r4-r7}
bx lr

View File

@@ -1,19 +1,20 @@
#include "common_asm.inc"
polys .req r0
count .req r1
polys .req r0 // arg
count .req r1 // arg
vp .req r2
vg0 .req r3
vg1 .req r4
vg2 .req r5
vg3 .req r6
flags .req r7
vp0 .req r8
vp1 .req r9
vp2 .req r10
vertices .req r11
ot .req r12
face .req lr
// FIQ regs
flags .req r8
vp0 .req r9
vp1 .req r10
vp2 .req r11
vertices .req r12
ot .req r13
face .req r14
vx0 .req vg0
vy0 .req vg1
@@ -32,7 +33,8 @@ next .req vp0
.global faceAddMeshTriangles_asm
faceAddMeshTriangles_asm:
stmfd sp!, {r4-r11, lr}
stmfd sp!, {r4-r6}
fiq_on
ldr vp, =gVerticesBase
ldr vp, [vp]
@@ -102,4 +104,6 @@ faceAddMeshTriangles_asm:
ldr tmp, =gFacesBase
str face, [tmp]
ldmfd sp!, {r4-r11, pc}
fiq_off
ldmfd sp!, {r4-r6}
bx lr

View File

@@ -1,19 +1,21 @@
#include "common_asm.inc"
polys .req r0
count .req r1
polys .req r0 // arg
count .req r1 // arg
vp .req r2
vg0 .req r3
vg1 .req r4
vg2 .req r5
vg3 .req r6
flags .req r7
// FIQ regs
vp0 .req r8
vp1 .req r9
vp2 .req r10
vp3 .req r11
ot .req r12
face .req lr
face .req r13
vertices .req r14
vx0 .req vg0
vy0 .req vg1
@@ -29,21 +31,20 @@ vz3 .req vg3
depth .req vg0
tmp .req flags
vertices .req vg2
next .req vp0
SP_SIZE = 4
.global faceAddRoomQuads_asm
faceAddRoomQuads_asm:
stmfd sp!, {r4-r11, lr}
stmfd sp!, {r4-r7}
fiq_on
ldr vp, =gVerticesBase
ldr vp, [vp]
ldr vertices, =gVertices
lsr vertices, #3
stmfd sp!, {vertices}
ldr face, =gFacesBase
ldr face, [face]
@@ -107,7 +108,6 @@ faceAddRoomQuads_asm:
mov depth, vz0, lsr #OT_SHIFT
// faceAdd
ldr vertices, [sp]
rsb vp0, vertices, vp0, lsr #3
rsb vp1, vertices, vp1, lsr #3
rsb vp2, vertices, vp2, lsr #3
@@ -126,5 +126,6 @@ faceAddRoomQuads_asm:
ldr tmp, =gFacesBase
str face, [tmp]
add sp, #SP_SIZE
ldmfd sp!, {r4-r11, pc}
fiq_off
ldmfd sp!, {r4-r7}
bx lr

View File

@@ -1,19 +1,20 @@
#include "common_asm.inc"
polys .req r0
count .req r1
polys .req r0 // arg
count .req r1 // arg
vp .req r2
vg0 .req r3
vg1 .req r4
vg2 .req r5
vg3 .req r6
flags .req r7
vp0 .req r8
vp1 .req r9
vp2 .req r10
vertices .req r11
ot .req r12
face .req lr
// FIQ regs
flags .req r8
vp0 .req r9
vp1 .req r10
vp2 .req r11
vertices .req r12
ot .req r13
face .req r14
vx0 .req vg0
vy0 .req vg1
@@ -32,7 +33,8 @@ next .req vp0
.global faceAddRoomTriangles_asm
faceAddRoomTriangles_asm:
stmfd sp!, {r4-r11, lr}
stmfd sp!, {r4-r6}
fiq_on
ldr vp, =gVerticesBase
ldr vp, [vp]
@@ -110,4 +112,6 @@ faceAddRoomTriangles_asm:
ldr tmp, =gFacesBase
str face, [tmp]
ldmfd sp!, {r4-r11, pc}
fiq_off
ldmfd sp!, {r4-r6}
bx lr

View File

@@ -1,8 +1,8 @@
#include "common_asm.inc"
this .req r0
x .req r1
z .req r2
this .req r0 // arg
x .req r1 // arg
z .req r2 // arg
info .req r3
roomX .req r12
roomZ .req roomX

View File

@@ -1,15 +1,16 @@
#include "common_asm.inc"
n .req r0
pmul .req r1
pdiv .req r2
m0 .req r3
m1 .req r4
m2 .req r5
n0 .req r6
n1 .req r7
n2 .req r12
m .req lr
n .req r0 // arg
pmul .req r1 // arg
pdiv .req r2 // arg
// FIQ regs
m0 .req r8
m1 .req r9
m2 .req r10
n0 .req r11
n1 .req r12
n2 .req r13
m .req r14
tmp .req m0
.macro load
@@ -83,7 +84,7 @@ tmp .req m0
.global matrixLerp_asm
matrixLerp_asm:
stmfd sp!, {r4-r7, lr}
fiq_on
ldr m, =gMatrixPtr
ldr m, [m]
.check_2:
@@ -111,5 +112,5 @@ matrixLerp_asm:
mov pmul, tmp, asr #8
lerp _X_Y
.done:
ldmfd sp!, {r4-r7, lr}
fiq_off
bx lr

View File

@@ -5,25 +5,26 @@ e1 .req r1
e2 .req r2
e3 .req r3
m .req e0
src .req r12
dst .req lr
// FIQ regs
src .req r8
dst .req r9
e4 .req r10
e5 .req r11
.global matrixPush_asm
matrixPush_asm:
stmfd sp!, {lr}
fiq_on
ldr m, =gMatrixPtr
ldr src, [m]
add dst, src, #(12 * 4)
str dst, [m]
ldmia src!, {e0, e1, e2, e3}
stmia dst!, {e0, e1, e2, e3}
ldmia src!, {e0, e1, e2, e3, e4, e5}
stmia dst!, {e0, e1, e2, e3, e4, e5}
ldmia src!, {e0, e1, e2, e3}
stmia dst!, {e0, e1, e2, e3}
ldmia src!, {e0, e1, e2, e3, e4, e5}
stmia dst!, {e0, e1, e2, e3, e4, e5}
ldmia src!, {e0, e1, e2, e3}
stmia dst!, {e0, e1, e2, e3}
ldmfd sp!, {lr}
fiq_off
bx lr

View File

@@ -18,17 +18,18 @@
mov \x, \x, asr #FIXED_SHIFT
.endm
angle .req r0
e0 .req r1
e1 .req r2
s .req r3
c .req r12
v .req lr
angle .req r0 // arg
s .req r1
c .req r2
v .req r3
// FIQ regs
e0 .req r8
e1 .req r9
m .req angle
.global matrixRotateX_asm
matrixRotateX_asm:
stmfd sp!, {lr}
fiq_on
mov angle, angle, lsl #16
mov angle, angle, lsr #20
@@ -53,12 +54,12 @@ matrixRotateX_asm:
rotxy e1, e0, s, c, v
stmia m, {e0, e1}
ldmfd sp!, {lr}
fiq_off
bx lr
.global matrixRotateY_asm
matrixRotateY_asm:
stmfd sp!, {lr}
fiq_on
mov angle, angle, lsl #16
mov angle, angle, lsr #20
@@ -86,12 +87,12 @@ matrixRotateY_asm:
str e0, [m], #8
str e1, [m], #8
ldmfd sp!, {lr}
fiq_off
bx lr
.global matrixRotateZ_asm
matrixRotateZ_asm:
stmfd sp!, {lr}
fiq_on
mov angle, angle, lsl #16
mov angle, angle, lsr #20
@@ -115,23 +116,24 @@ matrixRotateZ_asm:
rotxy e1, e0, s, c, v
stmia m, {e0, e1}
ldmfd sp!, {lr}
fiq_off
bx lr
angleX .req r0
angleY .req r1
angleZ .req r2
angleX .req r0 // arg
angleY .req r1 // arg
angleZ .req r2 // arg
e00 .req r3
e01 .req r4
e02 .req r5
e10 .req r6
e11 .req r7
e12 .req r8
e20 .req r9
e21 .req r10
e22 .req r11
// FIQ regs
e11 .req r8
e12 .req r9
e20 .req r10
e21 .req r11
tmp .req r12
sinX .req lr
e22 .req r13
sinX .req r14
sinY .req sinX
sinZ .req sinX
cosX .req angleX
@@ -153,7 +155,8 @@ matrixRotateYXZ_asm:
orrs mask, mask, angleZ
bxeq lr
stmfd sp!, {r4-r11, lr}
stmfd sp!, {r4-r6}
fiq_on
ldr mm, =gMatrixPtr
ldr mm, [mm]
@@ -203,10 +206,11 @@ matrixRotateYXZ_asm:
add mm, #(4 * 4)
stmia mm, {e20, e21, e22}
ldmfd sp!, {r4-r11, lr}
fiq_off
ldmfd sp!, {r4-r6}
bx lr
q .req r0
q .req r0 // arg
n .req r1
mx .req r3
my .req q

View File

@@ -1,7 +1,7 @@
#include "common_asm.inc"
dst .req r0
src .req r1
dst .req r0 // arg
src .req r1 // arg
e0 .req r2
e1 .req r3

View File

@@ -1,17 +1,18 @@
#include "common_asm.inc"
x .req r0
y .req r1
z .req r2
e0 .req r3
e1 .req r4
e2 .req r5
v .req r12
m .req lr
x .req r0 // arg
y .req r1 // arg
z .req r2 // arg
m .req r3
// FIQ regs
e0 .req r8
e1 .req r9
e2 .req r10
v .req r11
.global matrixTranslateRel_asm
matrixTranslateRel_asm:
stmfd sp!, {r4-r5, lr}
fiq_on
ldr m, =gMatrixPtr
ldr m, [m]
@@ -37,12 +38,12 @@ matrixTranslateRel_asm:
mla v, e2, z, v
stmdb m, {v}
ldmfd sp!, {r4-r5, lr}
fiq_off
bx lr
.global matrixTranslateAbs_asm
matrixTranslateAbs_asm:
stmfd sp!, {r4-r5, lr}
fiq_on
ldr v, =gCameraViewPos
ldmia v, {e0, e1, e2}
@@ -74,12 +75,12 @@ matrixTranslateAbs_asm:
mla v, e2, z, v
stmia m!, {v}
ldmfd sp!, {r4-r5, lr}
fiq_off
bx lr
.global matrixTranslateSet_asm
matrixTranslateSet_asm:
stmfd sp!, {r4-r5, lr}
fiq_on
ldr m, =gMatrixPtr
ldr m, [m]
@@ -105,5 +106,5 @@ matrixTranslateSet_asm:
mla v, e2, z, v
stmia m!, {v}
ldmfd sp!, {r4-r5, lr}
fiq_off
bx lr

View File

@@ -1,7 +1,7 @@
#include "common_asm.inc"
flags .req r0
L .req r1
flags .req r0 // arg
L .req r1 // arg
R .req r2
y .req r3
type .req r12

View File

@@ -1,19 +1,20 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
color .req r2
pixel .req r0 // arg
L .req r1 // arg
color .req r2 // arg
index .req r3
Lh .req r4
Rh .req r5
Lx .req r6
Rx .req r7
Ldx .req r8
Rdx .req r9
N .req r10
tmp .req r11
pair .req r12
width .req lr
// FIQ regs
Rx .req r8
Ldx .req r9
Rdx .req r10
N .req r11
tmp .req r12
pair .req r13
width .req r14
R .req color
h .req N
@@ -26,7 +27,8 @@ ptr .req tmp
.global rasterizeF_asm
rasterizeF_asm:
stmfd sp!, {r4-r11, lr}
stmfd sp!, {r4-r6}
fiq_on
add LMAP, color, #LMAP_ADDR
ldrb tmp, [L, #VERTEX_G]
@@ -133,4 +135,6 @@ rasterizeF_asm:
b .loop
.exit:
ldmfd sp!, {r4-r11, pc}
fiq_off
ldmfd sp!, {r4-r6}
bx lr

View File

@@ -1,94 +1,85 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
R .req r2
LMAP .req r3
arg_pixel .req r0 // arg
arg_L .req r1 // arg
arg_R .req r2 // arg
TILE .req r4
tmp .req r5
N .req r6
Lh .req r7
Rh .req r8
N .req r0
tmp .req r1
Lx .req r2
Rx .req r3
Lt .req r4
Rt .req r5
t .req r6
dtdx .req r7
Lx .req r9
Rx .req r10
Lt .req r11
Rt .req r12
h .req lr
indexA .req r8
indexB .req r9
LMAP .req r10
TILE .req r11
pixel .req r12
width .req lr
// FIQ regs
Ldx .req r8
Rdx .req r9
Ldt .req r10
Rdt .req r11
LRh .req r12
L .req r13
R .req r14
Rh .req LRh
Lh .req t
h .req N
ptr .req tmp
Ldx .req h
Rdx .req h
Ldt .req h
Rdt .req h
indexA .req Lh
indexB .req Rh
Rxy .req tmp
Ry2 .req Rh
Lxy .req tmp
Ly2 .req Lh
inv .req Lh
width .req N
t .req L
dtdx .req R
inv .req indexA
duv .req indexB
dtmp .req t
duv .req R
du .req L
dv .req R
Ldu .req N
Ldv .req h
Rdu .req N
Rdv .req h
Ltmp .req N
Rtmp .req N
Rti .req indexB
sLdx .req tmp
sLdt .req N
sRdx .req Lh
sRdt .req Rh
SP_LDX = 0
SP_LDT = 4
SP_RDX = 8
SP_RDT = 12
SP_L = 16
SP_R = 20
SP_LH = 24
SP_RH = 28
SP_SIZE = 32
.macro PUT_PIXELS
tex indexA, t
lit indexA
add t, dtdx, lsl #1
//orr indexA, indexA, lsl #8
strb indexA, [ptr], #2 // writing a byte to GBA VRAM will write a half word for free
.endm
.global rasterizeFT_asm
rasterizeFT_asm:
stmfd sp!, {r4-r11, lr}
sub sp, #SP_SIZE // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
mov pixel, arg_pixel
mov LMAP, #LMAP_ADDR
ldrb tmp, [L, #VERTEX_G]
add LMAP, tmp, lsl #8 // tmp = (L->v.g << 8)
ldrb t, [arg_L, #VERTEX_G]
add LMAP, t, lsl #8 // LMAP = (L->v.g << 8)
ldr TILE, =gTile
ldr TILE, [TILE]
mov Lh, #0 // Lh = 0
mov Rh, #0 // Rh = 0
fiq_on
mov L, arg_L
mov R, arg_R
mov LRh, #0 // Lh = 0
.loop:
lsr Lh, LRh, #16
lsl Rh, LRh, #16
lsr Rh, Rh, #16
cmp Lh, #0
bgt .calc_left_end // if (Lh != 0) end with left
@@ -114,12 +105,10 @@ rasterizeFT_asm:
ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
str Ldx, [sp, #SP_LDX] // store Ldx to stack
ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ldu, Ldv, tmp
str Ldt, [sp, #SP_LDT] // store Ldt to stack
scaleUV Ldt, Ltmp, tmp
.calc_left_end:
cmp Rh, #0
@@ -146,12 +135,10 @@ rasterizeFT_asm:
ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
str Rdx, [sp, #SP_RDX] // store Rdx to stack
ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rdu, Rdv, tmp
str Rdt, [sp, #SP_RDT] // store Rdt to stack
scaleUV Rdt, Rtmp, tmp
.calc_right_end:
cmp Rh, Lh // if (Rh < Lh)
@@ -160,8 +147,9 @@ rasterizeFT_asm:
sub Lh, h // Lh -= h
sub Rh, h // Rh -= h
add tmp, sp, #SP_L
stmia tmp, {L, R, Lh, Rh}
orr LRh, Rh, Lh, lsl #16
fiq_off
.scanline_start:
asr tmp, Lx, #16 // x1 = (Lx >> 16)
@@ -173,7 +161,7 @@ rasterizeFT_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, du, dv, inv
scaleUV dtdx, dtmp, inv
mov t, Lt // t = Lt
@@ -237,21 +225,20 @@ rasterizeFT_asm:
bne .scanline_block_8px
.scanline_end:
ldmia sp, {sLdx, sLdt, sRdx, sRdt}
add Lx, sLdx
add Lt, sLdt
add Rx, sRdx
add Rt, sRdt
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
fiq_on
add Lx, Ldx
add Rx, Rdx
add Lt, Ldt
add Rt, Rdt
subs h, #1
fiq_off_ne
bne .scanline_start
add tmp, sp, #SP_L
ldmia tmp, {L, R, Lh, Rh}
b .loop
.exit:
add sp, #SP_SIZE // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
ldmfd sp!, {r4-r11, pc}
fiq_off
ldmfd sp!, {r4-r11, pc}

View File

@@ -1,69 +1,55 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
R .req r2
LMAP .req r3
arg_pixel .req r0 // arg
arg_L .req r1 // arg
arg_R .req r2 // arg
TILE .req r4
tmp .req r5
N .req r6
Lh .req r7
Rh .req r8
N .req r0
tmp .req r1
Lx .req r2
Rx .req r3
Lt .req r4
Rt .req r5
t .req r6
dtdx .req r7
Lx .req r9
Rx .req r10
Lt .req r11
Rt .req r12
h .req lr
indexA .req r8
indexB .req r9
LMAP .req r10
TILE .req r11
pixel .req r12
width .req lr
// FIQ regs
Ldx .req r8
Rdx .req r9
Ldt .req r10
Rdt .req r11
LRh .req r12
L .req r13
R .req r14
Rh .req LRh
Lh .req t
h .req N
ptr .req tmp
Ldx .req h
Rdx .req h
Ldt .req h
Rdt .req h
indexA .req Lh
indexB .req Rh
Rxy .req tmp
Ry2 .req Rh
Lxy .req tmp
Ly2 .req Lh
inv .req Lh
width .req N
t .req L
dtdx .req R
inv .req indexA
duv .req indexB
dtmp .req t
duv .req R
du .req L
dv .req R
Ldu .req N
Ldv .req h
Rdu .req N
Rdv .req h
Ltmp .req N
Rtmp .req N
Rti .req indexB
sLdx .req tmp
sLdt .req N
sRdx .req Lh
sRdt .req Rh
SP_LDX = 0
SP_LDT = 4
SP_RDX = 8
SP_RDT = 12
SP_L = 16
SP_R = 20
SP_LH = 24
SP_RH = 28
SP_SIZE = 32
.macro PUT_PIXELS
tex indexA, t
add t, dtdx, lsl #1
@@ -76,22 +62,28 @@ SP_SIZE = 32
.global rasterizeFTA_asm
rasterizeFTA_asm:
stmfd sp!, {r4-r11, lr}
sub sp, #SP_SIZE // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
mov pixel, arg_pixel
mov LMAP, #LMAP_ADDR
ldrb tmp, [L, #VERTEX_G]
add LMAP, tmp, lsl #8 // tmp = (L->v.g << 8)
ldrb t, [arg_L, #VERTEX_G]
add LMAP, t, lsl #8 // LMAP = (L->v.g << 8)
ldr TILE, =gTile
ldr TILE, [TILE]
mov Lh, #0 // Lh = 0
mov Rh, #0 // Rh = 0
fiq_on
mov L, arg_L
mov R, arg_R
mov LRh, #0 // Lh = 0
.loop:
lsr Lh, LRh, #16
lsl Rh, LRh, #16
lsr Rh, Rh, #16
cmp Lh, #0
bne .calc_left_end // if (Lh != 0) end with left
bgt .calc_left_end // if (Lh != 0) end with left
.calc_left_start:
ldrsb N, [L, #VERTEX_PREV] // N = L + L->prev
@@ -114,16 +106,14 @@ rasterizeFTA_asm:
ldrsh Ldx, [L, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
str Ldx, [sp, #SP_LDX] // store Ldx to stack
ldr Ldt, [L, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ldu, Ldv, tmp
str Ldt, [sp, #SP_LDT] // store Ldt to stack
scaleUV Ldt, Ltmp, tmp
.calc_left_end:
cmp Rh, #0
bne .calc_right_end // if (Rh != 0) end with right
bgt .calc_right_end // if (Rh != 0) end with right
.calc_right_start:
ldrsb N, [R, #VERTEX_NEXT] // N = R + R->next
@@ -131,7 +121,7 @@ rasterizeFTA_asm:
ldr Rxy, [R, #VERTEX_X] // Rxy = (R->v.y << 16) | (R->v.x)
ldrsh Ry2, [N, #VERTEX_Y] // Ry2 = N->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = N->v.y - R->v.y
subs Rh, Ry2, Rxy, asr #16 // Rh = Ry2 - Rxy
blt .exit // if (Rh < 0) return
ldrne Rt, [R, #VERTEX_T] // Rt = R->t
mov R, N // R = N
@@ -146,12 +136,10 @@ rasterizeFTA_asm:
ldrsh Rdx, [R, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
str Rdx, [sp, #SP_RDX] // store Rdx to stack
ldr Rdt, [R, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rdu, Rdv, tmp
str Rdt, [sp, #SP_RDT] // store Rdt to stack
scaleUV Rdt, Rtmp, tmp
.calc_right_end:
cmp Rh, Lh // if (Rh < Lh)
@@ -160,8 +148,9 @@ rasterizeFTA_asm:
sub Lh, h // Lh -= h
sub Rh, h // Rh -= h
add tmp, sp, #SP_L
stmia tmp, {L, R, Lh, Rh}
orr LRh, Rh, Lh, lsl #16
fiq_off
.scanline_start:
asr tmp, Lx, #16 // x1 = (Lx >> 16)
@@ -173,7 +162,7 @@ rasterizeFTA_asm:
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // duv = Rt - Lt
scaleUV dtdx, du, dv, inv
scaleUV dtdx, dtmp, inv
mov t, Lt // t = Lt
@@ -241,21 +230,20 @@ rasterizeFTA_asm:
bne .scanline_block_8px
.scanline_end:
ldmia sp, {sLdx, sLdt, sRdx, sRdt}
add Lx, sLdx
add Lt, sLdt
add Rx, sRdx
add Rt, sRdt
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
fiq_on
add Lx, Ldx
add Rx, Rdx
add Lt, Ldt
add Rt, Rdt
subs h, #1
fiq_off_ne
bne .scanline_start
add tmp, sp, #SP_L
ldmia tmp, {L, R, Lh, Rh}
b .loop
.exit:
add sp, #SP_SIZE // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
ldmfd sp!, {r4-r11, pc}
fiq_off
ldmfd sp!, {r4-r11, pc}

View File

@@ -1,20 +1,22 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
R .req r2
p .req r4
w .req r5
indexA .req r6
indexB .req r12
shade .req lr
pixel .req r0 // arg
L .req r1 // arg
R .req r2 // arg
p .req r3
// FIQ regs
w .req r8
indexA .req r9
indexB .req r10
shade .req r11
width .req L
height .req R
LMAP .req shade
.global rasterizeFillS_asm
rasterizeFillS_asm:
stmfd sp!, {r4-r6, lr}
fiq_on
add R, #VERTEX_SIZEOF
ldrsh p, [L, #VERTEX_X]
@@ -68,4 +70,5 @@ rasterizeFillS_asm:
subs height, #1
bne .loop
ldmfd sp!, {r4-r6, pc}
fiq_off
bx lr

View File

@@ -1,39 +1,37 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
R .req r2
arg_pixel .req r0 // arg
arg_L .req r1 // arg
arg_R .req r2 // arg
Lh .req r3
Rh .req r4
Lx .req r5
Rx .req r6
Lg .req r7
Rg .req r8
Lt .req r9
Rt .req r10
tmp .req r11
N .req r12
N .req r0
tmp .req r1
Lx .req r2
Rx .req r3
Lg .req r4
Rg .req r5
Lt .req r6
Rt .req r7
L .req r8
R .req r9
Lh .req r10
Rh .req r11
pixel .req r12
TILE .req lr
// FIQ regs
Ldx .req r8
Rdx .req r9
Ldg .req r10
Rdg .req r11
Ldt .req r12
Rdt .req r13
h .req N
LMAP .req tmp
Ldx .req h
Rdx .req h
Ldg .req h
Rdg .req h
Ldt .req h
Rdt .req h
indexA .req Lh
indexB .req tmp
@@ -52,57 +50,37 @@ dgdx .req L
t .req Lt
dtdx .req R
du .req L
dv .req R
dtmp .req L
Ldu .req TILE
Ldv .req N
Rdu .req TILE
Rdv .req N
Ltmp .req N
Rtmp .req N
Rti .req tmp
Rgi .req tmp
sLdx .req L
sLdg .req R
sLdt .req Lh
sRdx .req Rh
sRdg .req tmp
sRdt .req tmp // not enough regs for one ldmia
SP_LDX = 0
SP_LDG = 4
SP_LDT = 8
SP_RDX = 12
SP_RDG = 16
SP_RDT = 20
SP_L = 24
SP_R = 28
SP_LH = 32
SP_RH = 36
SP_SIZE = 40
SP_TILE = SP_SIZE
SP_TILE = 0
SP_SIZE = 4
.macro PUT_PIXELS
bic LMAP, g, #255
add g, dgdx
tex indexA, t
lit indexA
add t, dtdx, lsl #1
//orr indexA, indexA, lsl #8
strb indexA, [ptr], #2 // writing a byte to GBA VRAM will write a half word for free
add g, dgdx, lsl #1
add t, dtdx, lsl #1
.endm
.global rasterizeGT_asm
rasterizeGT_asm:
ldr r3, =gTile
ldr r3, [r3]
stmfd sp!, {r3-r11, lr}
sub sp, #SP_SIZE // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
mov pixel, arg_pixel
mov L, arg_L
mov R, arg_R
mov Lh, #0 // Lh = 0
mov Rh, #0 // Rh = 0
@@ -132,21 +110,20 @@ rasterizeGT_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X]
fiq_on
ldrsh Ldx, [N, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
str Ldx, [sp, #SP_LDX] // store Ldx to stack
ldrb Ldg, [L, #VERTEX_G]
ldrb Ldg, [N, #VERTEX_G]
sub Ldg, Lg, lsr #8
mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #8 // 8-bit for fractional part
str Ldg, [sp, #SP_LDG] // store Ldg to stack
ldr Ldt, [L, #VERTEX_T]
ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ldu, Ldv, tmp
str Ldt, [sp, #SP_LDT] // store Ldt to stack
scaleUV Ldt, Ltmp, tmp
fiq_off
.calc_left_end:
cmp Rh, #0
@@ -172,21 +149,20 @@ rasterizeGT_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X]
fiq_on
ldrsh Rdx, [N, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
str Rdx, [sp, #SP_RDX] // store Rdx to stack
ldrb Rdg, [R, #VERTEX_G]
ldrb Rdg, [N, #VERTEX_G]
sub Rdg, Rg, lsr #8
mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #8 // 8-bit for fractional part
str Rdg, [sp, #SP_RDG] // store Ldg to stack
ldr Rdt, [R, #VERTEX_T]
ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rdu, Rdv, tmp
str Rdt, [sp, #SP_RDT] // store Rdt to stack
scaleUV Rdt, Rtmp, tmp
fiq_off
.calc_right_end:
orr Lg, #LMAP_ADDR
@@ -200,27 +176,26 @@ rasterizeGT_asm:
ldr TILE, [sp, #SP_TILE]
add tmp, sp, #SP_L
stmia tmp, {L, R, Lh, Rh}
stmfd sp!, {L, R, Lh, Rh}
.scanline_start:
asr Lh, Lx, #16 // x1 = (Lx >> 16)
rsbs width, Lh, Rx, asr #16 // width = (Rx >> 16) - x1
ble .scanline_end_fast // if (width <= 0) go next scanline
stmfd sp!, {Lx, Lg, Lt}
asr Lx, Lx, #16 // x1 = (Lx >> 16)
rsbs width, Lx, Rx, asr #16 // width = (Rx >> 16) - x1
ble .scanline_end // if (width <= 0) go next scanline
add ptr, pixel, Lx // ptr = pixel + x1
add ptr, pixel, Lx, asr #16 // ptr = pixel + x1
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, du, dv, inv
scaleUV dtdx, dtmp, inv
// t == Lt (alias)
sub dgdx, Rg, Lg // dgdx = Rg - Lg
mul dgdx, inv // dgdx *= FixedInvU(width)
asr dgdx, #15 // dgdx >>= 15
asr dgdx, #16 // dgdx >>= 16
// g == Lg (alias)
// 2 bytes alignment (VRAM write requirement)
@@ -229,18 +204,19 @@ rasterizeGT_asm:
beq .align_right
bic LMAP, g, #255
add g, dgdx, asr #1
tex indexA, t
lit indexA
ldrb indexB, [ptr, #-1]! // read pal index from VRAM (byte)
orr indexB, indexA, lsl #8
strh indexB, [ptr], #2
add t, dtdx
subs width, #1 // width--
beq .scanline_end // if (width == 0)
add g, dgdx
add t, dtdx
.align_right:
tst width, #1
beq .align_block_4px
@@ -248,7 +224,7 @@ rasterizeGT_asm:
sub Rti, Rt, dtdx
tex indexA, Rti
sub Rgi, Rg, dgdx, asr #1
sub Rgi, Rg, dgdx
bic LMAP, Rgi, #255
lit indexA
@@ -289,34 +265,25 @@ rasterizeGT_asm:
.scanline_end:
ldmfd sp!, {Lx, Lg, Lt}
/* TEST FIQ
mrs r1, cpsr // save current program status reg
msr cpsr, #0x11 // switch to FIQ mode with extra r8-r14 regs
mov r8, #0 // trash FIQ regs and
mov r10, #0 // it shouldn't affect normal mode regs
// mov r11, r11
msr cpsr, r1 // restore current program status reg
*/
ldmia sp, {sLdx, sLdg, sLdt, sRdx, sRdg}
add Lx, sLdx
add Lg, sLdg
add Lt, sLdt
add Rx, sRdx
add Rg, sRdg
ldr sRdt, [sp, #SP_RDT]
add Rt, sRdt
.scanline_end_fast:
fiq_on
add Lx, Ldx
add Rx, Rdx
add Lg, Ldg
add Rg, Rdg
add Lt, Ldt
add Rt, Rdt
fiq_off
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
subs h, #1
bne .scanline_start
add tmp, sp, #SP_L
ldmia tmp, {L, R, Lh, Rh}
ldmfd sp!, {L, R, Lh, Rh}
b .loop
.exit:
add sp, #(SP_SIZE + 4) // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt, TILE]
ldmfd sp!, {r4-r11, pc}
add sp, #SP_SIZE // revert reserved space for [TILE]
ldmfd sp!, {r4-r11, pc}

View File

@@ -1,39 +1,37 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
R .req r2
arg_pixel .req r0 // arg
arg_L .req r1 // arg
arg_R .req r2 // arg
Lh .req r3
Rh .req r4
Lx .req r5
Rx .req r6
Lg .req r7
Rg .req r8
Lt .req r9
Rt .req r10
tmp .req r11
N .req r12
N .req r0
tmp .req r1
Lx .req r2
Rx .req r3
Lg .req r4
Rg .req r5
Lt .req r6
Rt .req r7
L .req r8
R .req r9
Lh .req r10
Rh .req r11
pixel .req r12
TILE .req lr
// FIQ regs
Ldx .req r8
Rdx .req r9
Ldg .req r10
Rdg .req r11
Ldt .req r12
Rdt .req r13
h .req N
LMAP .req tmp
Ldx .req h
Rdx .req h
Ldg .req h
Rdg .req h
Ldt .req h
Rdt .req h
indexA .req Lh
indexB .req tmp
@@ -52,59 +50,39 @@ dgdx .req L
t .req Lt
dtdx .req R
dtmp .req L
duv .req R
du .req L
dv .req R
Ldu .req TILE
Ldv .req N
Rdu .req TILE
Rdv .req N
Ltmp .req N
Rtmp .req N
Rti .req tmp
Rgi .req tmp
sLdx .req L
sLdg .req R
sLdt .req Lh
sRdx .req Rh
sRdg .req tmp
sRdt .req tmp // not enough regs for one ldmia
SP_LDX = 0
SP_LDG = 4
SP_LDT = 8
SP_RDX = 12
SP_RDG = 16
SP_RDT = 20
SP_L = 24
SP_R = 28
SP_LH = 32
SP_RH = 36
SP_SIZE = 40
SP_TILE = SP_SIZE
SP_TILE = 0
SP_SIZE = 4
.macro PUT_PIXELS
bic LMAP, g, #255
add g, dgdx
tex indexA, t
add t, dtdx, lsl #1
cmp indexA, #0
ldrneb indexA, [LMAP, indexA]
strneb indexA, [ptr]
add ptr, #2
add g, dgdx, lsl #1
add t, dtdx, lsl #1
.endm
.global rasterizeGTA_asm
rasterizeGTA_asm:
ldr r3, =gTile
ldr r3, [r3]
stmfd sp!, {r3-r11, lr}
sub sp, #SP_SIZE // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
mov pixel, arg_pixel
mov L, arg_L
mov R, arg_R
mov Lh, #0 // Lh = 0
mov Rh, #0 // Rh = 0
@@ -134,21 +112,20 @@ rasterizeGTA_asm:
divLUT tmp, Lh // tmp = FixedInvU(Lh)
ldrsh Ldx, [L, #VERTEX_X]
fiq_on
ldrsh Ldx, [N, #VERTEX_X]
sub Ldx, Lx, asr #16
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
str Ldx, [sp, #SP_LDX] // store Ldx to stack
ldrb Ldg, [L, #VERTEX_G]
ldrb Ldg, [N, #VERTEX_G]
sub Ldg, Lg, lsr #8
mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg)
asr Ldg, #8 // 8-bit for fractional part
str Ldg, [sp, #SP_LDG] // store Ldg to stack
ldr Ldt, [L, #VERTEX_T]
ldr Ldt, [N, #VERTEX_T]
sub Ldt, Lt // Ldt = N->v.t - Lt
scaleUV Ldt, Ldu, Ldv, tmp
str Ldt, [sp, #SP_LDT] // store Ldt to stack
scaleUV Ldt, Ltmp, tmp
fiq_off
.calc_left_end:
cmp Rh, #0
@@ -174,21 +151,20 @@ rasterizeGTA_asm:
divLUT tmp, Rh // tmp = FixedInvU(Rh)
ldrsh Rdx, [R, #VERTEX_X]
fiq_on
ldrsh Rdx, [N, #VERTEX_X]
sub Rdx, Rx, asr #16
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
str Rdx, [sp, #SP_RDX] // store Rdx to stack
ldrb Rdg, [R, #VERTEX_G]
ldrb Rdg, [N, #VERTEX_G]
sub Rdg, Rg, lsr #8
mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg)
asr Rdg, #8 // 8-bit for fractional part
str Rdg, [sp, #SP_RDG] // store Ldg to stack
ldr Rdt, [R, #VERTEX_T]
ldr Rdt, [N, #VERTEX_T]
sub Rdt, Rt // Rdt = N->v.t - Rt
scaleUV Rdt, Rdu, Rdv, tmp
str Rdt, [sp, #SP_RDT] // store Rdt to stack
scaleUV Rdt, Rtmp, tmp
fiq_off
.calc_right_end:
orr Lg, #LMAP_ADDR
@@ -202,27 +178,26 @@ rasterizeGTA_asm:
ldr TILE, [sp, #SP_TILE]
add tmp, sp, #SP_L
stmia tmp, {L, R, Lh, Rh}
stmfd sp!, {L, R, Lh, Rh}
.scanline_start:
asr Lh, Lx, #16 // x1 = (Lx >> 16)
rsbs width, Lh, Rx, asr #16 // width = (Rx >> 16) - x1
ble .scanline_end_fast // if (width <= 0) go next scanline
stmfd sp!, {Lx, Lg, Lt}
asr Lx, Lx, #16 // x1 = (Lx >> 16)
rsbs width, Lx, Rx, asr #16 // width = (Rx >> 16) - x1
ble .scanline_end // if (width <= 0) go next scanline
add ptr, pixel, Lx // ptr = pixel + x1
add ptr, pixel, Lx, asr #16 // ptr = pixel + x1
divLUT inv, width // inv = FixedInvU(width)
sub dtdx, Rt, Lt // dtdx = Rt - Lt
scaleUV dtdx, du, dv, inv
scaleUV dtdx, dtmp, inv
// t == Lt (alias)
sub dgdx, Rg, Lg // dgdx = Rg - Lg
mul dgdx, inv // dgdx *= FixedInvU(width)
asr dgdx, #15 // dgdx >>= 15
asr dgdx, #16 // dgdx >>= 16
// g == Lg (alias)
// 2 bytes alignment (VRAM write requirement)
@@ -244,8 +219,8 @@ rasterizeGTA_asm:
.skip_left:
add ptr, #1
add g, dgdx
add t, dtdx
add g, dgdx, asr #1
subs width, #1 // width--
beq .scanline_end // if (width == 0)
@@ -305,26 +280,24 @@ rasterizeGTA_asm:
.scanline_end:
ldmfd sp!, {Lx, Lg, Lt}
ldmia sp, {sLdx, sLdg, sLdt, sRdx, sRdg}
add Lx, sLdx
add Lg, sLdg
add Lt, sLdt
add Rx, sRdx
add Rg, sRdg
ldr sRdt, [sp, #SP_RDT]
add Rt, sRdt
.scanline_end_fast:
fiq_on
add Lx, Ldx
add Rx, Rdx
add Lg, Ldg
add Rg, Rdg
add Lt, Ldt
add Rt, Rdt
fiq_off
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
subs h, #1
bne .scanline_start
add tmp, sp, #SP_L
ldmia tmp, {L, R, Lh, Rh}
ldmfd sp!, {L, R, Lh, Rh}
b .loop
.exit:
add sp, #(SP_SIZE + 4) // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt, TILE]
ldmfd sp!, {r4-r11, pc}
add sp, #SP_SIZE // revert reserved space for [TILE]
ldmfd sp!, {r4-r11, pc}

View File

@@ -1,8 +1,8 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
R .req r2
pixel .req r0 // arg
L .req r1 // arg
R .req r2 // arg
tmp .req r12
index .req L
width .req R

View File

@@ -1,8 +1,8 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
R .req r2
pixel .req r0 // arg
L .req r1 // arg
R .req r2 // arg
tmp .req r12
index .req L
height .req R

View File

@@ -1,30 +1,33 @@
#include "common_asm.inc"
pixel .req r0
L .req r1
R .req r2
pixel .req r0 // arg
L .req r1 // arg
R .req r2 // arg
LMAP .req r3
Lh .req r4
Rh .req r5
Lx .req r6
Rx .req r7
// FIQ regs
Ldx .req r8
Rdx .req r9
N .req r10
tmp .req r11
pair .req r12
width .req lr
width .req r13
indexA .req r14
h .req N
Rxy .req tmp
Ry2 .req Rh
Lxy .req tmp
Ly2 .req Lh
indexA .req Lh
indexB .req pair
.global rasterizeS_asm
rasterizeS_asm:
stmfd sp!, {r4-r11, lr}
stmfd sp!, {r4-r7}
fiq_on
mov LMAP, #LMAP_ADDR
add LMAP, #0x1A00
@@ -88,8 +91,6 @@ rasterizeS_asm:
sub Lh, h // Lh -= h
sub Rh, h // Rh -= h
stmfd sp!, {Lh}
.scanline_start:
asr tmp, Lx, #16 // x1 = (Lx >> 16)
rsbs width, tmp, Rx, asr #16 // width = (Rx >> 16) - x1
@@ -142,8 +143,9 @@ rasterizeS_asm:
subs h, #1
bne .scanline_start
ldmfd sp!, {Lh}
b .loop
.exit:
ldmfd sp!, {r4-r11, pc}
fiq_off
ldmfd sp!, {r4-r7}
bx lr

View File

@@ -2,4 +2,4 @@
.global rasterize_dummy
rasterize_dummy:
mov pc, lr
bx lr

View File

@@ -1,16 +1,17 @@
#include "common_asm.inc"
x .req r0
y .req r1
z .req r2
r .req r3
mx .req r4
my .req r5
mz .req r6
vx .req r7
vy .req r8
vz .req r12
m .req lr
x .req r0 // arg
y .req r1 // arg
z .req r2 // arg
r .req r3 // arg
// FIQ regs
mx .req r8
my .req r9
mz .req r10
vx .req r11
vy .req r12
vz .req r13
m .req r14
tmp .req m
vp .req m
vMinXY .req z
@@ -23,7 +24,7 @@ rMaxY .req y
.global sphereIsVisible_asm
sphereIsVisible_asm:
stmfd sp!, {r4-r8, lr}
fiq_on
ldr m, =gMatrixPtr
ldr m, [m]
@@ -75,10 +76,10 @@ sphereIsVisible_asm:
bgt .fail
mov r0, #1
ldmfd sp!, {r4-r8, lr}
fiq_off
bx lr
.fail:
mov r0, #0
ldmfd sp!, {r4-r8, lr}
fiq_off
bx lr

View File

@@ -1,24 +1,36 @@
#include "common_asm.inc"
vertices .req r0
count .req r1
intensity .req r2
m .req r3
vg .req intensity
vx .req r4
vy .req r5
vz .req r6
mx .req r7
my .req r8
mz .req r9
x .req r10
y .req r11
z .req r12
res .req lr
vertices .req r0 // arg
count .req r1 // arg
intensity .req r2 // arg
vx .req intensity
vy .req r3
vz .req r4
x .req r5
y .req r6
z .req vx
mx0 .req r7
ambient .req vx
mx2 .req r8
my2 .req r9
mz2 .req r10
mw2 .req r11
res .req r12
vg .req lr
// FIQ regs
my0 .req r8
mz0 .req r9
mw0 .req r10
mx1 .req r11
my1 .req r12
mz1 .req r13
mw1 .req r14
ambient .req vz
tmp .req vy
dz .req vx
dz .req vz
m .req vz
.global transformMesh_asm
transformMesh_asm:
@@ -38,6 +50,9 @@ transformMesh_asm:
ldr m, =gMatrixPtr
ldr m, [m]
fiq_on
ldmia m!, {mx0, my0, mz0, mw0, mx1, my1, mz1, mw1}
ldmia m, {mx2, my2, mz2, mw2}^
.loop:
// unpack vertex
@@ -45,30 +60,26 @@ transformMesh_asm:
ldrsh vy, [vertices], #2
ldrsh vz, [vertices], #2
bic vg, #CLIP_MASK // clear clipping flags
// transform x
ldmia m!, {mx, my, mz, x}
mla x, mx, vx, x
mla x, my, vy, x
mla x, mz, vz, x
mla x, mx0, vx, mw0
mla x, my0, vy, x
mla x, mz0, vz, x
asr x, #FIXED_SHIFT
// transform y
ldmia m!, {mx, my, mz, y}
mla y, mx, vx, y
mla y, my, vy, y
mla y, mz, vz, y
mla y, mx1, vx, mw1
mla y, my1, vy, y
mla y, mz1, vz, y
asr y, #FIXED_SHIFT
fiq_off
// transform z
ldmia m!, {mx, my, mz, z}
mla z, mx, vx, z
mla z, my, vy, z
mla z, mz, vz, z
mla z, mx2, vx, mw2
mla z, my2, vy, z
mla z, mz2, vz, z
asr z, #FIXED_SHIFT
sub m, #(12 * 4) // restore matrix ptr
bic vg, #CLIP_MASK // clear clipping flags
// z clipping
cmp z, #VIEW_MIN
@@ -102,6 +113,7 @@ transformMesh_asm:
strh vg, [res], #2
subs count, #1
fiq_on_ne
bne .loop
ldmfd sp!, {r4-r11, pc}

View File

@@ -1,33 +1,41 @@
#include "common_asm.inc"
vertices .req r0
count .req r1
m .req r2
v .req r3
vx .req r4
vy .req r5
vz .req r6
vg .req v
mx .req r7
my .req r8
mz .req r9
x .req r10
y .req r11
z .req r12
res .req lr
t .req y
vertices .req r0 // arg
count .req r1 // arg
vx .req r2
vy .req r3
vz .req r4
x .req vx
y .req r5
z .req r6
mx0 .req r7
spMinXY .req x
spMaxXY .req y
mx2 .req r8
my2 .req r9
mz2 .req r10
mw2 .req r11
res .req r12
vg .req lr
mask .req x
vp .req vx
minXY .req vx
maxXY .req vy
// FIQ regs
my0 .req r8
mz0 .req r9
mw0 .req r10
mx1 .req r11
my1 .req r12
mz1 .req r13
mw1 .req r14
tmp .req my
dz .req mz
fog .req mz
m .req vx
v .req vg
mask .req y
minXY .req vy
maxXY .req vz
tmp .req vy
dz .req vz
fog .req vz
SP_MINXY = 0
SP_MAXXY = 4
@@ -41,18 +49,18 @@ transformRoom_asm:
ldr res, [res]
add res, #VERTEX_G
ldr tmp, =viewportRel
ldmia tmp, {minXY, maxXY}
stmfd sp!, {minXY, maxXY}
mov mask, #(0xFF << 10)
ldr m, =gMatrixPtr
ldr m, [m]
ldr vp, =viewportRel
ldmia vp, {spMinXY, spMaxXY}
stmfd sp!, {spMinXY, spMaxXY}
// preload mask, matrix and z-row
mov mask, #(0xFF << 10)
add m, #(12 * 4)
ldmdb m!, {mx, my, mz, z}
fiq_on
ldmia m!, {mx0, my0, mz0, mw0, mx1, my1, mz1, mw1}
ldmia m, {mx2, my2, mz2, mw2}^
fiq_off
.loop:
// unpack vertex
@@ -63,33 +71,33 @@ transformRoom_asm:
and vx, mask, v, lsl #10
// transform z
mla t, mx, vx, z
mla t, my, vy, t
mla t, mz, vz, t
asr t, #FIXED_SHIFT
mla z, mx2, vx, mw2
mla z, my2, vy, z
mla z, mz2, vz, z
asr z, #FIXED_SHIFT
// skip if vertex is out of z-range
add t, #VIEW_OFF
cmp t, #(VIEW_OFF + VIEW_OFF + VIEW_MAX)
add z, #VIEW_OFF
cmp z, #(VIEW_OFF + VIEW_OFF + VIEW_MAX)
movhi vg, #(CLIP_NEAR + CLIP_FAR)
bhi .skip
and vg, mask, v, lsr #14
sub z, t, #VIEW_OFF
sub z, #VIEW_OFF
fiq_on
// transform y
ldmdb m!, {mx, my, mz, y}
mla y, mx, vx, y
mla y, my, vy, y
mla y, mz, vz, y
mla y, mx1, vx, mw1
mla y, my1, vy, y
mla y, mz1, vz, y
asr y, #FIXED_SHIFT
// transform x
ldmdb m!, {mx, my, mz, x}
mla x, mx, vx, x
mla x, my, vy, x
mla x, mz, vz, x
mla x, mx0, vx, mw0
mla x, my0, vy, x
mla x, mz0, vz, x
asr x, #FIXED_SHIFT
fiq_off
// fog
cmp z, #FOG_MIN
@@ -145,11 +153,7 @@ transformRoom_asm:
strh y, [res, #-4]
strh z, [res, #-2]
// preload mask, matrix and z-row
mov mask, #(0xFF << 10)
add m, #(12 * 4)
ldmdb m!, {mx, my, mz, z}
.skip:
strh vg, [res], #8

View File

@@ -1,46 +1,57 @@
#include "common_asm.inc"
vertices .req r0
count .req r1
m .req r2
v .req r3
vx .req r4
vy .req r5
vz .req r6
vg .req v
mx .req r7
my .req r8
mz .req r9
x .req r10
y .req r11
z .req r12
res .req lr
t .req y
vertices .req r0 // arg
count .req r1 // arg
vx .req r2
vy .req r3
vz .req r4
x .req vx
y .req r5
z .req r6
mx0 .req r7
spMinXY .req mx
spMaxXY .req my
spFrame .req mz
spCaustLUT .req x
spRandLUT .req y
mx2 .req r8
my2 .req r9
mz2 .req r10
mw2 .req r11
res .req r12
vg .req lr
mask .req x
vp .req vx
minXY .req vx
maxXY .req vy
// FIQ regs
my0 .req r8
mz0 .req r9
mw0 .req r10
mx1 .req r11
my1 .req r12
mz1 .req r13
mw1 .req r14
dz .req mz
fog .req mz
m .req vx
v .req vg
mask .req y
frame .req vx
minXY .req vy
maxXY .req vz
tmp .req vy
dz .req vz
fog .req vz
frame .req vy
caust .req vy
rand .req vz
tmp .req mx
spMinXY .req vx
spMaxXY .req vy
spRandLUT .req vz
spFrame .req y
spCaustLUT .req z
SP_MINXY = 0
SP_MAXXY = 4
SP_FRAME = 8
SP_CAUST = 12
SP_RAND = 16
SP_RAND = 8
SP_FRAME = 12
SP_CAUST = 16
SP_SIZE = 20
.global transformRoomUW_asm
@@ -51,11 +62,8 @@ transformRoomUW_asm:
ldr res, [res]
add res, #VERTEX_G
ldr m, =gMatrixPtr
ldr m, [m]
ldr vp, =viewportRel
ldmia vp, {spMinXY, spMaxXY}
ldr tmp, =viewportRel
ldmia tmp, {spMinXY, spMaxXY}
ldr spFrame, =gCausticsFrame
ldr spFrame, [spFrame]
@@ -63,12 +71,16 @@ transformRoomUW_asm:
ldr spCaustLUT, =gCaustics
ldr spRandLUT, =gRandTable
stmfd sp!, {spMinXY, spMaxXY, spFrame, spCaustLUT, spRandLUT}
stmfd sp!, {spMinXY, spMaxXY, spRandLUT, spFrame, spCaustLUT}
// preload mask, matrix and z-row
mov mask, #(0xFF << 10)
add m, #(12 * 4)
ldmdb m!, {mx, my, mz, z}
ldr m, =gMatrixPtr
ldr m, [m]
fiq_on
ldmia m!, {mx0, my0, mz0, mw0, mx1, my1, mz1, mw1}
ldmia m, {mx2, my2, mz2, mw2}^
fiq_off
.loop:
// unpack vertex
@@ -79,41 +91,42 @@ transformRoomUW_asm:
and vx, mask, v, lsl #10
// transform z
mla t, mx, vx, z
mla t, my, vy, t
mla t, mz, vz, t
asr t, #FIXED_SHIFT
mla z, mx2, vx, mw2
mla z, my2, vy, z
mla z, mz2, vz, z
asr z, #FIXED_SHIFT
// skip if vertex is out of z-range
add t, #VIEW_OFF
cmp t, #(VIEW_OFF + VIEW_OFF + VIEW_MAX)
add z, #VIEW_OFF
cmp z, #(VIEW_OFF + VIEW_OFF + VIEW_MAX)
movhi vg, #(CLIP_NEAR + CLIP_FAR)
bhi .skip
and vg, mask, v, lsr #14
sub z, t, #VIEW_OFF
sub z, #VIEW_OFF
fiq_on
// transform y
ldmdb m!, {mx, my, mz, y}
mla y, mx, vx, y
mla y, my, vy, y
mla y, mz, vz, y
mla y, mx1, vx, mw1
mla y, my1, vy, y
mla y, mz1, vz, y
asr y, #FIXED_SHIFT
// transform x
ldmdb m!, {mx, my, mz, x}
mla x, mx, vx, x
mla x, my, vy, x
mla x, mz, vz, x
mla x, mx0, vx, mw0
mla x, my0, vy, x
mla x, mz0, vz, x
asr x, #FIXED_SHIFT
fiq_off
// caustics
add tmp, sp, #SP_FRAME
ldmia tmp, {frame, caust, rand}
ldr rand, [sp, #SP_RAND]
and tmp, count, #(MAX_RAND_TABLE - 1)
ldr rand, [rand, tmp, lsl #2]
ldr frame, [sp, #SP_FRAME]
add rand, frame
and rand, #(MAX_CAUSTICS - 1)
ldr caust, [sp, #SP_CAUST]
ldr caust, [caust, rand, lsl #2]
add vg, caust, lsl #5
@@ -171,11 +184,7 @@ transformRoomUW_asm:
strh y, [res, #-4]
strh z, [res, #-2]
// preload mask, matrix and z-row
mov mask, #(0xFF << 10)
add m, #(12 * 4)
ldmdb m!, {mx, my, mz, z}
.skip:
strh vg, [res], #8