mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-08 14:16:52 +02:00
#368 GBA rasterizer optimization
This commit is contained in:
@@ -72,3 +72,13 @@ MAX_INT32 = 0x7FFFFFFF
|
|||||||
mlas tmp, vx1, vy2, tmp
|
mlas tmp, vx1, vy2, tmp
|
||||||
bgt \skip
|
bgt \skip
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
.macro tex index, uv
|
||||||
|
and \index, \uv, #0xFF00
|
||||||
|
orr \index, \uv, lsr #24 // index = t.v * 256 + t.u
|
||||||
|
ldrb \index, [TILE, \index]
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro lit index
|
||||||
|
ldrb \index, [LMAP, \index]
|
||||||
|
.endm
|
||||||
|
@@ -11,11 +11,11 @@ N .req r6
|
|||||||
Lh .req r7
|
Lh .req r7
|
||||||
Rh .req r8
|
Rh .req r8
|
||||||
|
|
||||||
Lx .req ip
|
Lx .req r9
|
||||||
Rx .req lr
|
Rx .req r10
|
||||||
Lt .req r9
|
Lt .req r11
|
||||||
Rt .req r10
|
Rt .req r12
|
||||||
h .req r11
|
h .req lr
|
||||||
|
|
||||||
Ldx .req h
|
Ldx .req h
|
||||||
Rdx .req h
|
Rdx .req h
|
||||||
@@ -49,20 +49,21 @@ Rduv .req h
|
|||||||
Rdu .req N
|
Rdu .req N
|
||||||
Rdv .req h
|
Rdv .req h
|
||||||
|
|
||||||
|
sLdx .req tmp
|
||||||
|
sLdt .req N
|
||||||
|
sRdx .req Lh
|
||||||
|
sRdt .req Rh
|
||||||
|
|
||||||
SP_LDX = 0
|
SP_LDX = 0
|
||||||
SP_LDT = 4
|
SP_LDT = 4
|
||||||
SP_RDX = 8
|
SP_RDX = 8
|
||||||
SP_RDT = 12
|
SP_RDT = 12
|
||||||
|
|
||||||
.macro PUT_PIXELS
|
.macro PUT_PIXELS
|
||||||
and indexA, t, #0xFF00
|
tex indexA, t
|
||||||
orr indexA, t, lsr #24 // indexA = t.v * 256 + t.u
|
|
||||||
ldrb indexA, [TILE, indexA]
|
|
||||||
add t, dtdx
|
add t, dtdx
|
||||||
|
|
||||||
and indexB, t, #0xFF00
|
tex indexB, t
|
||||||
orr indexB, t, lsr #24 // indexB = t.v * 256 + t.u
|
|
||||||
ldrb indexB, [TILE, indexB]
|
|
||||||
add t, dtdx
|
add t, dtdx
|
||||||
|
|
||||||
// cheap non-accurate alpha test, skip pixels pair if one or both are transparent
|
// cheap non-accurate alpha test, skip pixels pair if one or both are transparent
|
||||||
@@ -78,7 +79,7 @@ SP_RDT = 12
|
|||||||
|
|
||||||
.global rasterizeFTA_mode4_asm
|
.global rasterizeFTA_mode4_asm
|
||||||
rasterizeFTA_mode4_asm:
|
rasterizeFTA_mode4_asm:
|
||||||
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
|
sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
|
||||||
|
|
||||||
mov LMAP, #LMAP_ADDR
|
mov LMAP, #LMAP_ADDR
|
||||||
@@ -273,17 +274,12 @@ rasterizeFTA_mode4_asm:
|
|||||||
bne .scanline_block_8px
|
bne .scanline_block_8px
|
||||||
|
|
||||||
.scanline_end:
|
.scanline_end:
|
||||||
ldr tmp, [sp, #(SP_LDX + 16)]
|
add tmp, sp, #16
|
||||||
add Lx, tmp // Lx += Ldx from stack
|
ldmia tmp, {sLdx, sLdt, sRdx, sRdt}
|
||||||
|
add Lx, sLdx
|
||||||
ldr tmp, [sp, #(SP_LDT + 16)]
|
add Lt, sLdt
|
||||||
add Lt, tmp // Lt += Ldt from stack
|
add Rx, sRdx
|
||||||
|
add Rt, sRdt
|
||||||
ldr tmp, [sp, #(SP_RDX + 16)]
|
|
||||||
add Rx, tmp // Rx += Rdx from stack
|
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDT + 16)]
|
|
||||||
add Rt, tmp // Rt += Rdt from stack
|
|
||||||
|
|
||||||
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
|
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
|
||||||
|
|
||||||
@@ -295,4 +291,4 @@ rasterizeFTA_mode4_asm:
|
|||||||
|
|
||||||
.exit:
|
.exit:
|
||||||
add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
|
add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
|
||||||
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
ldmfd sp!, {r4-r11, pc}
|
@@ -11,11 +11,11 @@ N .req r6
|
|||||||
Lh .req r7
|
Lh .req r7
|
||||||
Rh .req r8
|
Rh .req r8
|
||||||
|
|
||||||
Lx .req ip
|
Lx .req r9
|
||||||
Rx .req lr
|
Rx .req r10
|
||||||
Lt .req r9
|
Lt .req r11
|
||||||
Rt .req r10
|
Rt .req r12
|
||||||
h .req r11
|
h .req lr
|
||||||
|
|
||||||
Ldx .req h
|
Ldx .req h
|
||||||
Rdx .req h
|
Rdx .req h
|
||||||
@@ -49,39 +49,40 @@ Rduv .req h
|
|||||||
Rdu .req N
|
Rdu .req N
|
||||||
Rdv .req h
|
Rdv .req h
|
||||||
|
|
||||||
|
sLdx .req tmp
|
||||||
|
sLdt .req N
|
||||||
|
sRdx .req Lh
|
||||||
|
sRdt .req Rh
|
||||||
|
|
||||||
SP_LDX = 0
|
SP_LDX = 0
|
||||||
SP_LDT = 4
|
SP_LDT = 4
|
||||||
SP_RDX = 8
|
SP_RDX = 8
|
||||||
SP_RDT = 12
|
SP_RDT = 12
|
||||||
|
|
||||||
.macro PUT_PIXELS
|
.macro PUT_PIXELS
|
||||||
and indexA, t, #0xFF00
|
tex indexA, t
|
||||||
orr indexA, t, lsr #24 // indexA = t.v * 256 + t.u
|
lit indexA
|
||||||
ldrb indexA, [TILE, indexA]
|
|
||||||
ldrb indexA, [LMAP, indexA]
|
|
||||||
|
|
||||||
#ifndef TEX_2PX
|
#ifndef TEX_2PX
|
||||||
add t, dtdx
|
add t, dtdx
|
||||||
|
|
||||||
and indexB, t, #0xFF00
|
tex indexB, t
|
||||||
orr indexB, t, lsr #24 // indexB = t.v * 256 + t.u
|
lit indexB
|
||||||
ldrb indexB, [TILE, indexB]
|
|
||||||
ldrb indexB, [LMAP, indexB]
|
|
||||||
add t, dtdx
|
add t, dtdx
|
||||||
|
|
||||||
orr indexA, indexB, lsl #8
|
orr indexA, indexB, lsl #8
|
||||||
|
strh indexA, [ptr], #2
|
||||||
#else
|
#else
|
||||||
add t, dtdx, lsl #1
|
add t, dtdx, lsl #1
|
||||||
|
|
||||||
//orr indexA, indexA, lsl #8
|
//orr indexA, indexA, lsl #8
|
||||||
|
strb indexA, [tmp], #2 // writing a byte to GBA VRAM will write a half word for free
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
strb indexA, [tmp], #2
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.global rasterizeFT_mode4_asm
|
.global rasterizeFT_mode4_asm
|
||||||
rasterizeFT_mode4_asm:
|
rasterizeFT_mode4_asm:
|
||||||
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
|
sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
|
||||||
|
|
||||||
mov LMAP, #LMAP_ADDR
|
mov LMAP, #LMAP_ADDR
|
||||||
@@ -273,26 +274,21 @@ rasterizeFT_mode4_asm:
|
|||||||
bne .scanline_block_8px
|
bne .scanline_block_8px
|
||||||
|
|
||||||
.scanline_end:
|
.scanline_end:
|
||||||
ldr tmp, [sp, #(SP_LDX + 16)]
|
add tmp, sp, #16
|
||||||
add Lx, tmp // Lx += Ldx from stack
|
ldmia tmp, {sLdx, sLdt, sRdx, sRdt}
|
||||||
|
add Lx, sLdx
|
||||||
|
add Lt, sLdt
|
||||||
|
add Rx, sRdx
|
||||||
|
add Rt, sRdt
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_LDT + 16)]
|
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
|
||||||
add Lt, tmp // Lt += Ldt from stack
|
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDX + 16)]
|
|
||||||
add Rx, tmp // Rx += Rdx from stack
|
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDT + 16)]
|
|
||||||
add Rt, tmp // Rt += Rdt from stack
|
|
||||||
|
|
||||||
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
|
|
||||||
|
|
||||||
subs h, #1
|
subs h, #1
|
||||||
bne .scanline_start
|
bne .scanline_start
|
||||||
|
|
||||||
ldmfd sp!, {L,R,Lh,Rh} // sp+16
|
ldmfd sp!, {L,R,Lh,Rh} // sp+16
|
||||||
b .loop
|
b .loop
|
||||||
|
|
||||||
.exit:
|
.exit:
|
||||||
add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
|
add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
|
||||||
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
ldmfd sp!, {r4-r11, pc}
|
@@ -6,14 +6,15 @@ R .req r2
|
|||||||
index .req r3
|
index .req r3
|
||||||
Lh .req r4
|
Lh .req r4
|
||||||
Rh .req r5
|
Rh .req r5
|
||||||
Lx .req ip
|
Lx .req r6
|
||||||
Rx .req lr
|
Rx .req r7
|
||||||
Ldx .req r6
|
Ldx .req r8
|
||||||
Rdx .req r7
|
Rdx .req r9
|
||||||
N .req r8
|
N .req r10
|
||||||
tmp .req r9
|
tmp .req r11
|
||||||
DIVLUT .req r10
|
DIVLUT .req r12
|
||||||
width .req r11
|
width .req lr
|
||||||
|
|
||||||
h .req N
|
h .req N
|
||||||
Ry1 .req tmp
|
Ry1 .req tmp
|
||||||
Ry2 .req Rh
|
Ry2 .req Rh
|
||||||
@@ -25,7 +26,7 @@ blocks .req DIVLUT
|
|||||||
|
|
||||||
.global rasterizeF_mode4_asm
|
.global rasterizeF_mode4_asm
|
||||||
rasterizeF_mode4_asm:
|
rasterizeF_mode4_asm:
|
||||||
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
|
|
||||||
mov LMAP, #LMAP_ADDR
|
mov LMAP, #LMAP_ADDR
|
||||||
|
|
||||||
@@ -136,4 +137,4 @@ rasterizeF_mode4_asm:
|
|||||||
b .loop
|
b .loop
|
||||||
|
|
||||||
.exit:
|
.exit:
|
||||||
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
ldmfd sp!, {r4-r11, pc}
|
@@ -70,6 +70,13 @@ Rdv .req N
|
|||||||
Rti .req tmp
|
Rti .req tmp
|
||||||
Rgi .req tmp
|
Rgi .req tmp
|
||||||
|
|
||||||
|
sLdx .req L
|
||||||
|
sLdg .req R
|
||||||
|
sLdt .req Lh
|
||||||
|
sRdx .req Rh
|
||||||
|
sRdg .req tmp
|
||||||
|
sRdt .req N // not used in ldm due h collision
|
||||||
|
|
||||||
SP_LDX = 0
|
SP_LDX = 0
|
||||||
SP_LDG = 4
|
SP_LDG = 4
|
||||||
SP_LDT = 8
|
SP_LDT = 8
|
||||||
@@ -81,14 +88,10 @@ SP_RDT = 20
|
|||||||
bic LMAP, g, #255
|
bic LMAP, g, #255
|
||||||
add g, dgdx
|
add g, dgdx
|
||||||
|
|
||||||
and indexA, t, #0xFF00
|
tex indexA, t
|
||||||
orr indexA, t, lsr #24 // indexA = t.v * 256 + t.u
|
|
||||||
ldrb indexA, [TILE, indexA]
|
|
||||||
add t, dtdx
|
add t, dtdx
|
||||||
|
|
||||||
and indexB, t, #0xFF00
|
tex indexB, t
|
||||||
orr indexB, t, lsr #24 // indexB = t.v * 256 + t.u
|
|
||||||
ldrb indexB, [TILE, indexB]
|
|
||||||
add t, dtdx
|
add t, dtdx
|
||||||
|
|
||||||
// cheap non-accurate alpha test, skip pixels pair if one or both are transparent
|
// cheap non-accurate alpha test, skip pixels pair if one or both are transparent
|
||||||
@@ -104,7 +107,7 @@ SP_RDT = 20
|
|||||||
|
|
||||||
.global rasterizeGTA_mode4_asm
|
.global rasterizeGTA_mode4_asm
|
||||||
rasterizeGTA_mode4_asm:
|
rasterizeGTA_mode4_asm:
|
||||||
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||||
|
|
||||||
mov Lh, #0 // Lh = 0
|
mov Lh, #0 // Lh = 0
|
||||||
@@ -339,20 +342,14 @@ rasterizeGTA_mode4_asm:
|
|||||||
.scanline_end:
|
.scanline_end:
|
||||||
ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24
|
ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_LDX + 16)]
|
add tmp, sp, #16
|
||||||
add Lx, tmp // Lx += Ldx from stack
|
ldmia tmp, {sLdx, sLdg, sLdt, sRdx, sRdg}
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_LDG + 16)]
|
add Lx, sLdx
|
||||||
add Lg, tmp // Lg += Ldg from stack
|
add Lg, sLdg
|
||||||
|
add Lt, sLdt
|
||||||
ldr tmp, [sp, #(SP_LDT + 16)]
|
add Rx, sRdx
|
||||||
add Lt, tmp // Lt += Ldt from stack
|
add Rg, sRdg
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDX + 16)]
|
|
||||||
add Rx, tmp // Rx += Rdx from stack
|
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDG + 16)]
|
|
||||||
add Rg, tmp // Rg += Rdg from stack
|
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDT + 16)]
|
ldr tmp, [sp, #(SP_RDT + 16)]
|
||||||
add Rt, tmp // Rt += Rdt from stack
|
add Rt, tmp // Rt += Rdt from stack
|
||||||
@@ -367,4 +364,4 @@ rasterizeGTA_mode4_asm:
|
|||||||
|
|
||||||
.exit:
|
.exit:
|
||||||
add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||||
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
ldmfd sp!, {r4-r11, pc}
|
@@ -70,6 +70,13 @@ Rdv .req N
|
|||||||
Rti .req tmp
|
Rti .req tmp
|
||||||
Rgi .req tmp
|
Rgi .req tmp
|
||||||
|
|
||||||
|
sLdx .req L
|
||||||
|
sLdg .req R
|
||||||
|
sLdt .req Lh
|
||||||
|
sRdx .req Rh
|
||||||
|
sRdg .req tmp
|
||||||
|
sRdt .req N // not used in ldm due h collision
|
||||||
|
|
||||||
SP_LDX = 0
|
SP_LDX = 0
|
||||||
SP_LDG = 4
|
SP_LDG = 4
|
||||||
SP_LDT = 8
|
SP_LDT = 8
|
||||||
@@ -81,18 +88,15 @@ SP_RDT = 20
|
|||||||
bic LMAP, g, #255
|
bic LMAP, g, #255
|
||||||
add g, dgdx
|
add g, dgdx
|
||||||
|
|
||||||
and indexA, t, #0xFF00
|
tex indexA, t
|
||||||
orr indexA, t, lsr #24 // indexA = t.v * 256 + t.u
|
lit indexA
|
||||||
ldrb indexA, [TILE, indexA]
|
|
||||||
ldrb indexA, [LMAP, indexA]
|
|
||||||
|
|
||||||
#ifndef TEX_2PX
|
#ifndef TEX_2PX
|
||||||
add t, dtdx
|
add t, dtdx
|
||||||
|
|
||||||
and indexB, t, #0xFF00
|
tex indexB, t
|
||||||
orr indexB, t, lsr #24 // indexB = t.v * 256 + t.u
|
lit indexB
|
||||||
ldrb indexB, [TILE, indexB]
|
|
||||||
ldrb indexB, [LMAP, indexB]
|
|
||||||
add t, dtdx
|
add t, dtdx
|
||||||
|
|
||||||
orr indexA, indexB, lsl #8
|
orr indexA, indexB, lsl #8
|
||||||
@@ -101,13 +105,13 @@ SP_RDT = 20
|
|||||||
add t, dtdx, lsl #1
|
add t, dtdx, lsl #1
|
||||||
|
|
||||||
//orr indexA, indexA, lsl #8
|
//orr indexA, indexA, lsl #8
|
||||||
strb indexA, [ptr], #2
|
strb indexA, [ptr], #2 // writing a byte to GBA VRAM will write a half word for free
|
||||||
#endif
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.global rasterizeGT_mode4_asm
|
.global rasterizeGT_mode4_asm
|
||||||
rasterizeGT_mode4_asm:
|
rasterizeGT_mode4_asm:
|
||||||
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||||
|
|
||||||
mov Lh, #0 // Lh = 0
|
mov Lh, #0 // Lh = 0
|
||||||
@@ -330,20 +334,14 @@ rasterizeGT_mode4_asm:
|
|||||||
.scanline_end:
|
.scanline_end:
|
||||||
ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24
|
ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_LDX + 16)]
|
add tmp, sp, #16
|
||||||
add Lx, tmp // Lx += Ldx from stack
|
ldmia tmp, {sLdx, sLdg, sLdt, sRdx, sRdg}
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_LDG + 16)]
|
add Lx, sLdx
|
||||||
add Lg, tmp // Lg += Ldg from stack
|
add Lg, sLdg
|
||||||
|
add Lt, sLdt
|
||||||
ldr tmp, [sp, #(SP_LDT + 16)]
|
add Rx, sRdx
|
||||||
add Lt, tmp // Lt += Ldt from stack
|
add Rg, sRdg
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDX + 16)]
|
|
||||||
add Rx, tmp // Rx += Rdx from stack
|
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDG + 16)]
|
|
||||||
add Rg, tmp // Rg += Rdg from stack
|
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDT + 16)]
|
ldr tmp, [sp, #(SP_RDT + 16)]
|
||||||
add Rt, tmp // Rt += Rdt from stack
|
add Rt, tmp // Rt += Rdt from stack
|
||||||
@@ -358,4 +356,4 @@ rasterizeGT_mode4_asm:
|
|||||||
|
|
||||||
.exit:
|
.exit:
|
||||||
add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||||
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
ldmfd sp!, {r4-r11, pc}
|
@@ -9,11 +9,11 @@ tmp .req r5
|
|||||||
N .req r6
|
N .req r6
|
||||||
Lh .req r7
|
Lh .req r7
|
||||||
Rh .req r8
|
Rh .req r8
|
||||||
Lx .req ip
|
Lx .req r9
|
||||||
Rx .req lr
|
Rx .req r10
|
||||||
Lg .req r9
|
Lg .req r11
|
||||||
Rg .req r10
|
Rg .req r12
|
||||||
h .req r11
|
h .req lr
|
||||||
Ldx .req h
|
Ldx .req h
|
||||||
Rdx .req Ldx
|
Rdx .req Ldx
|
||||||
Ldg .req Ldx
|
Ldg .req Ldx
|
||||||
@@ -29,6 +29,11 @@ width .req Rh
|
|||||||
g .req L
|
g .req L
|
||||||
dgdx .req R
|
dgdx .req R
|
||||||
|
|
||||||
|
sLdx .req L
|
||||||
|
sLdg .req R
|
||||||
|
sRdx .req Lh
|
||||||
|
sRdg .req Rh
|
||||||
|
|
||||||
SP_LDX = 0
|
SP_LDX = 0
|
||||||
SP_LDG = 4
|
SP_LDG = 4
|
||||||
SP_RDX = 8
|
SP_RDX = 8
|
||||||
@@ -44,7 +49,7 @@ SP_RDG = 12
|
|||||||
|
|
||||||
.global rasterizeG_mode4_asm
|
.global rasterizeG_mode4_asm
|
||||||
rasterizeG_mode4_asm:
|
rasterizeG_mode4_asm:
|
||||||
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
sub sp, #16 // reserve stack space for [Ldx, Ldg, Rdx, Rdg]
|
sub sp, #16 // reserve stack space for [Ldx, Ldg, Rdx, Rdg]
|
||||||
|
|
||||||
mov tmp, #LMAP_ADDR
|
mov tmp, #LMAP_ADDR
|
||||||
@@ -188,17 +193,12 @@ rasterizeG_mode4_asm:
|
|||||||
bne .scanline_block_4px
|
bne .scanline_block_4px
|
||||||
|
|
||||||
.scanline_end:
|
.scanline_end:
|
||||||
ldr tmp, [sp, #(SP_LDX + 16)]
|
add tmp, sp, #16
|
||||||
add Lx, tmp // Lx += Ldx from stack
|
ldmia tmp, {sLdx, sLdg, sRdx, sRdg}
|
||||||
|
add Lx, sLdx
|
||||||
ldr tmp, [sp, #(SP_LDG + 16)]
|
add Lg, sLdg
|
||||||
add Lg, tmp // Lg += Ldg from stack
|
add Rx, sRdx
|
||||||
|
add Rg, sRdg
|
||||||
ldr tmp, [sp, #(SP_RDX + 16)]
|
|
||||||
add Rx, tmp // Rx += Rdx from stack
|
|
||||||
|
|
||||||
ldr tmp, [sp, #(SP_RDG + 16)]
|
|
||||||
add Rg, tmp // Rg += Rdg from stack
|
|
||||||
|
|
||||||
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
|
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
|
||||||
|
|
||||||
@@ -210,4 +210,4 @@ rasterizeG_mode4_asm:
|
|||||||
|
|
||||||
.exit:
|
.exit:
|
||||||
add sp, #16 // revert reserved space for [Ldx, Ldg, Rdx, Rdg]
|
add sp, #16 // revert reserved space for [Ldx, Ldg, Rdx, Rdg]
|
||||||
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
ldmfd sp!, {r4-r11, pc}
|
@@ -6,14 +6,14 @@ R .req r2
|
|||||||
LMAP .req r3
|
LMAP .req r3
|
||||||
Lh .req r4
|
Lh .req r4
|
||||||
Rh .req r5
|
Rh .req r5
|
||||||
Lx .req ip
|
Lx .req r6
|
||||||
Rx .req lr
|
Rx .req r7
|
||||||
Ldx .req r6
|
Ldx .req r8
|
||||||
Rdx .req r7
|
Rdx .req r9
|
||||||
N .req r8
|
N .req r10
|
||||||
tmp .req r9
|
tmp .req r11
|
||||||
DIVLUT .req r10
|
DIVLUT .req r12
|
||||||
width .req r11
|
width .req lr
|
||||||
h .req N
|
h .req N
|
||||||
Ry1 .req tmp
|
Ry1 .req tmp
|
||||||
Ry2 .req Rh
|
Ry2 .req Rh
|
||||||
@@ -28,7 +28,7 @@ indexB .req DIVLUT
|
|||||||
|
|
||||||
.global rasterizeS_mode4_asm
|
.global rasterizeS_mode4_asm
|
||||||
rasterizeS_mode4_asm:
|
rasterizeS_mode4_asm:
|
||||||
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
|
|
||||||
ldr LMAP, .shadow_lightmap
|
ldr LMAP, .shadow_lightmap
|
||||||
|
|
||||||
@@ -149,4 +149,4 @@ rasterizeS_mode4_asm:
|
|||||||
b .loop
|
b .loop
|
||||||
|
|
||||||
.exit:
|
.exit:
|
||||||
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc}
|
ldmfd sp!, {r4-r11, pc}
|
@@ -71,7 +71,7 @@ transformMesh_asm:
|
|||||||
mla y, mx, vx, y
|
mla y, mx, vx, y
|
||||||
mla y, my, vy, y
|
mla y, my, vy, y
|
||||||
mla y, mz, vz, y
|
mla y, mz, vz, y
|
||||||
mov y, y, asr #(FIXED_SHIFT - PROJ_SHIFT)
|
mov y, y, asr #FIXED_SHIFT
|
||||||
|
|
||||||
// transform z
|
// transform z
|
||||||
ldmia m!, {mx, my, mz, z}
|
ldmia m!, {mx, my, mz, z}
|
||||||
@@ -99,21 +99,23 @@ transformMesh_asm:
|
|||||||
mul x, dz, x
|
mul x, dz, x
|
||||||
mul y, dz, y
|
mul y, dz, y
|
||||||
mov x, x, asr #(16 - PROJ_SHIFT)
|
mov x, x, asr #(16 - PROJ_SHIFT)
|
||||||
// keep y shifted by 16 for min/max cmp
|
mov y, y, asr #(16 - PROJ_SHIFT)
|
||||||
|
|
||||||
// viewport clipping
|
// viewport clipping
|
||||||
ldmia sp, {minXY, maxXY}
|
ldmia sp, {minXY, maxXY}
|
||||||
|
|
||||||
cmp x, minXY, asr #16
|
cmp x, minXY, asr #16
|
||||||
orrle vg, vg, #CLIP_LEFT
|
orrle vg, vg, #CLIP_LEFT
|
||||||
cmp y, minXY, lsl #16
|
|
||||||
orrle vg, vg, #CLIP_TOP
|
|
||||||
cmp x, maxXY, asr #16
|
cmp x, maxXY, asr #16
|
||||||
orrge vg, vg, #CLIP_RIGHT
|
orrge vg, vg, #CLIP_RIGHT
|
||||||
cmp y, maxXY, lsl #16
|
|
||||||
orrge vg, vg, #CLIP_BOTTOM
|
|
||||||
|
|
||||||
mov y, y, asr #16
|
mov minXY, minXY, lsl #16
|
||||||
|
mov maxXY, maxXY, lsl #16
|
||||||
|
|
||||||
|
cmp y, minXY, asr #16
|
||||||
|
orrle vg, vg, #CLIP_TOP
|
||||||
|
cmp y, maxXY, asr #16
|
||||||
|
orrge vg, vg, #CLIP_BOTTOM
|
||||||
|
|
||||||
add x, x, #(FRAME_WIDTH >> 1)
|
add x, x, #(FRAME_WIDTH >> 1)
|
||||||
add y, y, #(FRAME_HEIGHT >> 1)
|
add y, y, #(FRAME_HEIGHT >> 1)
|
||||||
|
@@ -85,7 +85,7 @@ transformRoom_asm:
|
|||||||
mla y, mx, vx, y
|
mla y, mx, vx, y
|
||||||
mla y, my, vy, y
|
mla y, my, vy, y
|
||||||
mla y, mz, vz, y
|
mla y, mz, vz, y
|
||||||
mov y, y, asr #(FIXED_SHIFT - PROJ_SHIFT)
|
mov y, y, asr #FIXED_SHIFT
|
||||||
|
|
||||||
// transform x
|
// transform x
|
||||||
ldmdb m!, {mx, my, mz, x}
|
ldmdb m!, {mx, my, mz, x}
|
||||||
@@ -121,21 +121,23 @@ transformRoom_asm:
|
|||||||
mul x, dz, x
|
mul x, dz, x
|
||||||
mul y, dz, y
|
mul y, dz, y
|
||||||
mov x, x, asr #(16 - PROJ_SHIFT)
|
mov x, x, asr #(16 - PROJ_SHIFT)
|
||||||
// keep y shifted by 16 for min/max cmp
|
mov y, y, asr #(16 - PROJ_SHIFT)
|
||||||
|
|
||||||
// viewport clipping
|
// viewport clipping
|
||||||
ldmia sp, {m, minXY, maxXY} // preload matrix
|
ldmia sp, {m, minXY, maxXY} // preload matrix
|
||||||
|
|
||||||
cmp x, minXY, asr #16
|
cmp x, minXY, asr #16
|
||||||
orrle vg, vg, #CLIP_LEFT
|
orrle vg, vg, #CLIP_LEFT
|
||||||
cmp y, minXY, lsl #16
|
|
||||||
orrle vg, vg, #CLIP_TOP
|
|
||||||
cmp x, maxXY, asr #16
|
cmp x, maxXY, asr #16
|
||||||
orrge vg, vg, #CLIP_RIGHT
|
orrge vg, vg, #CLIP_RIGHT
|
||||||
cmp y, maxXY, lsl #16
|
|
||||||
orrge vg, vg, #CLIP_BOTTOM
|
|
||||||
|
|
||||||
mov y, y, asr #16
|
mov minXY, minXY, lsl #16
|
||||||
|
mov maxXY, maxXY, lsl #16
|
||||||
|
|
||||||
|
cmp y, minXY, asr #16
|
||||||
|
orrle vg, vg, #CLIP_TOP
|
||||||
|
cmp y, maxXY, asr #16
|
||||||
|
orrge vg, vg, #CLIP_BOTTOM
|
||||||
|
|
||||||
add x, x, #(FRAME_WIDTH >> 1)
|
add x, x, #(FRAME_WIDTH >> 1)
|
||||||
add y, y, #(FRAME_HEIGHT >> 1)
|
add y, y, #(FRAME_HEIGHT >> 1)
|
||||||
|
Reference in New Issue
Block a user