1
0
mirror of https://github.com/XProger/OpenLara.git synced 2025-08-08 14:16:52 +02:00

#368 GBA rasterizer optimization

This commit is contained in:
XProger
2021-12-30 09:56:40 +03:00
parent e7cb40706e
commit 9bcc8468d0
10 changed files with 154 additions and 152 deletions

View File

@@ -71,4 +71,14 @@ MAX_INT32 = 0x7FFFFFFF
sub vy2, vy2, vy0 sub vy2, vy2, vy0
mlas tmp, vx1, vy2, tmp mlas tmp, vx1, vy2, tmp
bgt \skip bgt \skip
.endm .endm
.macro tex index, uv
and \index, \uv, #0xFF00
orr \index, \uv, lsr #24 // index = t.v * 256 + t.u
ldrb \index, [TILE, \index]
.endm
.macro lit index
ldrb \index, [LMAP, \index]
.endm

View File

@@ -11,11 +11,11 @@ N .req r6
Lh .req r7 Lh .req r7
Rh .req r8 Rh .req r8
Lx .req ip Lx .req r9
Rx .req lr Rx .req r10
Lt .req r9 Lt .req r11
Rt .req r10 Rt .req r12
h .req r11 h .req lr
Ldx .req h Ldx .req h
Rdx .req h Rdx .req h
@@ -49,20 +49,21 @@ Rduv .req h
Rdu .req N Rdu .req N
Rdv .req h Rdv .req h
sLdx .req tmp
sLdt .req N
sRdx .req Lh
sRdt .req Rh
SP_LDX = 0 SP_LDX = 0
SP_LDT = 4 SP_LDT = 4
SP_RDX = 8 SP_RDX = 8
SP_RDT = 12 SP_RDT = 12
.macro PUT_PIXELS .macro PUT_PIXELS
and indexA, t, #0xFF00 tex indexA, t
orr indexA, t, lsr #24 // indexA = t.v * 256 + t.u
ldrb indexA, [TILE, indexA]
add t, dtdx add t, dtdx
and indexB, t, #0xFF00 tex indexB, t
orr indexB, t, lsr #24 // indexB = t.v * 256 + t.u
ldrb indexB, [TILE, indexB]
add t, dtdx add t, dtdx
// cheap non-accurate alpha test, skip pixels pair if one or both are transparent // cheap non-accurate alpha test, skip pixels pair if one or both are transparent
@@ -78,7 +79,7 @@ SP_RDT = 12
.global rasterizeFTA_mode4_asm .global rasterizeFTA_mode4_asm
rasterizeFTA_mode4_asm: rasterizeFTA_mode4_asm:
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} stmfd sp!, {r4-r11, lr}
sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt] sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
mov LMAP, #LMAP_ADDR mov LMAP, #LMAP_ADDR
@@ -273,17 +274,12 @@ rasterizeFTA_mode4_asm:
bne .scanline_block_8px bne .scanline_block_8px
.scanline_end: .scanline_end:
ldr tmp, [sp, #(SP_LDX + 16)] add tmp, sp, #16
add Lx, tmp // Lx += Ldx from stack ldmia tmp, {sLdx, sLdt, sRdx, sRdt}
add Lx, sLdx
ldr tmp, [sp, #(SP_LDT + 16)] add Lt, sLdt
add Lt, tmp // Lt += Ldt from stack add Rx, sRdx
add Rt, sRdt
ldr tmp, [sp, #(SP_RDX + 16)]
add Rx, tmp // Rx += Rdx from stack
ldr tmp, [sp, #(SP_RDT + 16)]
add Rt, tmp // Rt += Rdt from stack
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240) add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
@@ -295,4 +291,4 @@ rasterizeFTA_mode4_asm:
.exit: .exit:
add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt] add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -11,11 +11,11 @@ N .req r6
Lh .req r7 Lh .req r7
Rh .req r8 Rh .req r8
Lx .req ip Lx .req r9
Rx .req lr Rx .req r10
Lt .req r9 Lt .req r11
Rt .req r10 Rt .req r12
h .req r11 h .req lr
Ldx .req h Ldx .req h
Rdx .req h Rdx .req h
@@ -49,39 +49,40 @@ Rduv .req h
Rdu .req N Rdu .req N
Rdv .req h Rdv .req h
sLdx .req tmp
sLdt .req N
sRdx .req Lh
sRdt .req Rh
SP_LDX = 0 SP_LDX = 0
SP_LDT = 4 SP_LDT = 4
SP_RDX = 8 SP_RDX = 8
SP_RDT = 12 SP_RDT = 12
.macro PUT_PIXELS .macro PUT_PIXELS
and indexA, t, #0xFF00 tex indexA, t
orr indexA, t, lsr #24 // indexA = t.v * 256 + t.u lit indexA
ldrb indexA, [TILE, indexA]
ldrb indexA, [LMAP, indexA]
#ifndef TEX_2PX #ifndef TEX_2PX
add t, dtdx add t, dtdx
and indexB, t, #0xFF00 tex indexB, t
orr indexB, t, lsr #24 // indexB = t.v * 256 + t.u lit indexB
ldrb indexB, [TILE, indexB]
ldrb indexB, [LMAP, indexB]
add t, dtdx add t, dtdx
orr indexA, indexB, lsl #8 orr indexA, indexB, lsl #8
strh indexA, [ptr], #2
#else #else
add t, dtdx, lsl #1 add t, dtdx, lsl #1
//orr indexA, indexA, lsl #8 //orr indexA, indexA, lsl #8
strb indexA, [tmp], #2 // writing a byte to GBA VRAM will write a half word for free
#endif #endif
strb indexA, [tmp], #2
.endm .endm
.global rasterizeFT_mode4_asm .global rasterizeFT_mode4_asm
rasterizeFT_mode4_asm: rasterizeFT_mode4_asm:
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} stmfd sp!, {r4-r11, lr}
sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt] sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
mov LMAP, #LMAP_ADDR mov LMAP, #LMAP_ADDR
@@ -273,26 +274,21 @@ rasterizeFT_mode4_asm:
bne .scanline_block_8px bne .scanline_block_8px
.scanline_end: .scanline_end:
ldr tmp, [sp, #(SP_LDX + 16)] add tmp, sp, #16
add Lx, tmp // Lx += Ldx from stack ldmia tmp, {sLdx, sLdt, sRdx, sRdt}
add Lx, sLdx
add Lt, sLdt
add Rx, sRdx
add Rt, sRdt
ldr tmp, [sp, #(SP_LDT + 16)] add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
add Lt, tmp // Lt += Ldt from stack
ldr tmp, [sp, #(SP_RDX + 16)]
add Rx, tmp // Rx += Rdx from stack
ldr tmp, [sp, #(SP_RDT + 16)]
add Rt, tmp // Rt += Rdt from stack
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
subs h, #1 subs h, #1
bne .scanline_start bne .scanline_start
ldmfd sp!, {L,R,Lh,Rh} // sp+16 ldmfd sp!, {L,R,Lh,Rh} // sp+16
b .loop b .loop
.exit: .exit:
add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt] add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -6,14 +6,15 @@ R .req r2
index .req r3 index .req r3
Lh .req r4 Lh .req r4
Rh .req r5 Rh .req r5
Lx .req ip Lx .req r6
Rx .req lr Rx .req r7
Ldx .req r6 Ldx .req r8
Rdx .req r7 Rdx .req r9
N .req r8 N .req r10
tmp .req r9 tmp .req r11
DIVLUT .req r10 DIVLUT .req r12
width .req r11 width .req lr
h .req N h .req N
Ry1 .req tmp Ry1 .req tmp
Ry2 .req Rh Ry2 .req Rh
@@ -25,7 +26,7 @@ blocks .req DIVLUT
.global rasterizeF_mode4_asm .global rasterizeF_mode4_asm
rasterizeF_mode4_asm: rasterizeF_mode4_asm:
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} stmfd sp!, {r4-r11, lr}
mov LMAP, #LMAP_ADDR mov LMAP, #LMAP_ADDR
@@ -136,4 +137,4 @@ rasterizeF_mode4_asm:
b .loop b .loop
.exit: .exit:
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -70,6 +70,13 @@ Rdv .req N
Rti .req tmp Rti .req tmp
Rgi .req tmp Rgi .req tmp
sLdx .req L
sLdg .req R
sLdt .req Lh
sRdx .req Rh
sRdg .req tmp
sRdt .req N // not used in ldm due h collision
SP_LDX = 0 SP_LDX = 0
SP_LDG = 4 SP_LDG = 4
SP_LDT = 8 SP_LDT = 8
@@ -81,14 +88,10 @@ SP_RDT = 20
bic LMAP, g, #255 bic LMAP, g, #255
add g, dgdx add g, dgdx
and indexA, t, #0xFF00 tex indexA, t
orr indexA, t, lsr #24 // indexA = t.v * 256 + t.u
ldrb indexA, [TILE, indexA]
add t, dtdx add t, dtdx
and indexB, t, #0xFF00 tex indexB, t
orr indexB, t, lsr #24 // indexB = t.v * 256 + t.u
ldrb indexB, [TILE, indexB]
add t, dtdx add t, dtdx
// cheap non-accurate alpha test, skip pixels pair if one or both are transparent // cheap non-accurate alpha test, skip pixels pair if one or both are transparent
@@ -104,7 +107,7 @@ SP_RDT = 20
.global rasterizeGTA_mode4_asm .global rasterizeGTA_mode4_asm
rasterizeGTA_mode4_asm: rasterizeGTA_mode4_asm:
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} stmfd sp!, {r4-r11, lr}
sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt] sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
mov Lh, #0 // Lh = 0 mov Lh, #0 // Lh = 0
@@ -339,20 +342,14 @@ rasterizeGTA_mode4_asm:
.scanline_end: .scanline_end:
ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24 ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24
ldr tmp, [sp, #(SP_LDX + 16)] add tmp, sp, #16
add Lx, tmp // Lx += Ldx from stack ldmia tmp, {sLdx, sLdg, sLdt, sRdx, sRdg}
ldr tmp, [sp, #(SP_LDG + 16)] add Lx, sLdx
add Lg, tmp // Lg += Ldg from stack add Lg, sLdg
add Lt, sLdt
ldr tmp, [sp, #(SP_LDT + 16)] add Rx, sRdx
add Lt, tmp // Lt += Ldt from stack add Rg, sRdg
ldr tmp, [sp, #(SP_RDX + 16)]
add Rx, tmp // Rx += Rdx from stack
ldr tmp, [sp, #(SP_RDG + 16)]
add Rg, tmp // Rg += Rdg from stack
ldr tmp, [sp, #(SP_RDT + 16)] ldr tmp, [sp, #(SP_RDT + 16)]
add Rt, tmp // Rt += Rdt from stack add Rt, tmp // Rt += Rdt from stack
@@ -367,4 +364,4 @@ rasterizeGTA_mode4_asm:
.exit: .exit:
add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt] add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -70,6 +70,13 @@ Rdv .req N
Rti .req tmp Rti .req tmp
Rgi .req tmp Rgi .req tmp
sLdx .req L
sLdg .req R
sLdt .req Lh
sRdx .req Rh
sRdg .req tmp
sRdt .req N // not used in ldm due h collision
SP_LDX = 0 SP_LDX = 0
SP_LDG = 4 SP_LDG = 4
SP_LDT = 8 SP_LDT = 8
@@ -81,18 +88,15 @@ SP_RDT = 20
bic LMAP, g, #255 bic LMAP, g, #255
add g, dgdx add g, dgdx
and indexA, t, #0xFF00 tex indexA, t
orr indexA, t, lsr #24 // indexA = t.v * 256 + t.u lit indexA
ldrb indexA, [TILE, indexA]
ldrb indexA, [LMAP, indexA]
#ifndef TEX_2PX #ifndef TEX_2PX
add t, dtdx add t, dtdx
and indexB, t, #0xFF00 tex indexB, t
orr indexB, t, lsr #24 // indexB = t.v * 256 + t.u lit indexB
ldrb indexB, [TILE, indexB]
ldrb indexB, [LMAP, indexB]
add t, dtdx add t, dtdx
orr indexA, indexB, lsl #8 orr indexA, indexB, lsl #8
@@ -101,13 +105,13 @@ SP_RDT = 20
add t, dtdx, lsl #1 add t, dtdx, lsl #1
//orr indexA, indexA, lsl #8 //orr indexA, indexA, lsl #8
strb indexA, [ptr], #2 strb indexA, [ptr], #2 // writing a byte to GBA VRAM will write a half word for free
#endif #endif
.endm .endm
.global rasterizeGT_mode4_asm .global rasterizeGT_mode4_asm
rasterizeGT_mode4_asm: rasterizeGT_mode4_asm:
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} stmfd sp!, {r4-r11, lr}
sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt] sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
mov Lh, #0 // Lh = 0 mov Lh, #0 // Lh = 0
@@ -330,20 +334,14 @@ rasterizeGT_mode4_asm:
.scanline_end: .scanline_end:
ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24 ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24
ldr tmp, [sp, #(SP_LDX + 16)] add tmp, sp, #16
add Lx, tmp // Lx += Ldx from stack ldmia tmp, {sLdx, sLdg, sLdt, sRdx, sRdg}
ldr tmp, [sp, #(SP_LDG + 16)] add Lx, sLdx
add Lg, tmp // Lg += Ldg from stack add Lg, sLdg
add Lt, sLdt
ldr tmp, [sp, #(SP_LDT + 16)] add Rx, sRdx
add Lt, tmp // Lt += Ldt from stack add Rg, sRdg
ldr tmp, [sp, #(SP_RDX + 16)]
add Rx, tmp // Rx += Rdx from stack
ldr tmp, [sp, #(SP_RDG + 16)]
add Rg, tmp // Rg += Rdg from stack
ldr tmp, [sp, #(SP_RDT + 16)] ldr tmp, [sp, #(SP_RDT + 16)]
add Rt, tmp // Rt += Rdt from stack add Rt, tmp // Rt += Rdt from stack
@@ -358,4 +356,4 @@ rasterizeGT_mode4_asm:
.exit: .exit:
add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt] add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -9,11 +9,11 @@ tmp .req r5
N .req r6 N .req r6
Lh .req r7 Lh .req r7
Rh .req r8 Rh .req r8
Lx .req ip Lx .req r9
Rx .req lr Rx .req r10
Lg .req r9 Lg .req r11
Rg .req r10 Rg .req r12
h .req r11 h .req lr
Ldx .req h Ldx .req h
Rdx .req Ldx Rdx .req Ldx
Ldg .req Ldx Ldg .req Ldx
@@ -29,6 +29,11 @@ width .req Rh
g .req L g .req L
dgdx .req R dgdx .req R
sLdx .req L
sLdg .req R
sRdx .req Lh
sRdg .req Rh
SP_LDX = 0 SP_LDX = 0
SP_LDG = 4 SP_LDG = 4
SP_RDX = 8 SP_RDX = 8
@@ -44,7 +49,7 @@ SP_RDG = 12
.global rasterizeG_mode4_asm .global rasterizeG_mode4_asm
rasterizeG_mode4_asm: rasterizeG_mode4_asm:
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} stmfd sp!, {r4-r11, lr}
sub sp, #16 // reserve stack space for [Ldx, Ldg, Rdx, Rdg] sub sp, #16 // reserve stack space for [Ldx, Ldg, Rdx, Rdg]
mov tmp, #LMAP_ADDR mov tmp, #LMAP_ADDR
@@ -188,17 +193,12 @@ rasterizeG_mode4_asm:
bne .scanline_block_4px bne .scanline_block_4px
.scanline_end: .scanline_end:
ldr tmp, [sp, #(SP_LDX + 16)] add tmp, sp, #16
add Lx, tmp // Lx += Ldx from stack ldmia tmp, {sLdx, sLdg, sRdx, sRdg}
add Lx, sLdx
ldr tmp, [sp, #(SP_LDG + 16)] add Lg, sLdg
add Lg, tmp // Lg += Ldg from stack add Rx, sRdx
add Rg, sRdg
ldr tmp, [sp, #(SP_RDX + 16)]
add Rx, tmp // Rx += Rdx from stack
ldr tmp, [sp, #(SP_RDG + 16)]
add Rg, tmp // Rg += Rdg from stack
add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240) add pixel, #VRAM_STRIDE // pixel += FRAME_WIDTH (240)
@@ -210,4 +210,4 @@ rasterizeG_mode4_asm:
.exit: .exit:
add sp, #16 // revert reserved space for [Ldx, Ldg, Rdx, Rdg] add sp, #16 // revert reserved space for [Ldx, Ldg, Rdx, Rdg]
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -6,14 +6,14 @@ R .req r2
LMAP .req r3 LMAP .req r3
Lh .req r4 Lh .req r4
Rh .req r5 Rh .req r5
Lx .req ip Lx .req r6
Rx .req lr Rx .req r7
Ldx .req r6 Ldx .req r8
Rdx .req r7 Rdx .req r9
N .req r8 N .req r10
tmp .req r9 tmp .req r11
DIVLUT .req r10 DIVLUT .req r12
width .req r11 width .req lr
h .req N h .req N
Ry1 .req tmp Ry1 .req tmp
Ry2 .req Rh Ry2 .req Rh
@@ -28,7 +28,7 @@ indexB .req DIVLUT
.global rasterizeS_mode4_asm .global rasterizeS_mode4_asm
rasterizeS_mode4_asm: rasterizeS_mode4_asm:
stmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} stmfd sp!, {r4-r11, lr}
ldr LMAP, .shadow_lightmap ldr LMAP, .shadow_lightmap
@@ -149,4 +149,4 @@ rasterizeS_mode4_asm:
b .loop b .loop
.exit: .exit:
ldmfd sp!, {r4,r5,r6,r7,r8,r9,r10,r11,pc} ldmfd sp!, {r4-r11, pc}

View File

@@ -71,7 +71,7 @@ transformMesh_asm:
mla y, mx, vx, y mla y, mx, vx, y
mla y, my, vy, y mla y, my, vy, y
mla y, mz, vz, y mla y, mz, vz, y
mov y, y, asr #(FIXED_SHIFT - PROJ_SHIFT) mov y, y, asr #FIXED_SHIFT
// transform z // transform z
ldmia m!, {mx, my, mz, z} ldmia m!, {mx, my, mz, z}
@@ -99,21 +99,23 @@ transformMesh_asm:
mul x, dz, x mul x, dz, x
mul y, dz, y mul y, dz, y
mov x, x, asr #(16 - PROJ_SHIFT) mov x, x, asr #(16 - PROJ_SHIFT)
// keep y shifted by 16 for min/max cmp mov y, y, asr #(16 - PROJ_SHIFT)
// viewport clipping // viewport clipping
ldmia sp, {minXY, maxXY} ldmia sp, {minXY, maxXY}
cmp x, minXY, asr #16 cmp x, minXY, asr #16
orrle vg, vg, #CLIP_LEFT orrle vg, vg, #CLIP_LEFT
cmp y, minXY, lsl #16
orrle vg, vg, #CLIP_TOP
cmp x, maxXY, asr #16 cmp x, maxXY, asr #16
orrge vg, vg, #CLIP_RIGHT orrge vg, vg, #CLIP_RIGHT
cmp y, maxXY, lsl #16
orrge vg, vg, #CLIP_BOTTOM
mov y, y, asr #16 mov minXY, minXY, lsl #16
mov maxXY, maxXY, lsl #16
cmp y, minXY, asr #16
orrle vg, vg, #CLIP_TOP
cmp y, maxXY, asr #16
orrge vg, vg, #CLIP_BOTTOM
add x, x, #(FRAME_WIDTH >> 1) add x, x, #(FRAME_WIDTH >> 1)
add y, y, #(FRAME_HEIGHT >> 1) add y, y, #(FRAME_HEIGHT >> 1)

View File

@@ -85,7 +85,7 @@ transformRoom_asm:
mla y, mx, vx, y mla y, mx, vx, y
mla y, my, vy, y mla y, my, vy, y
mla y, mz, vz, y mla y, mz, vz, y
mov y, y, asr #(FIXED_SHIFT - PROJ_SHIFT) mov y, y, asr #FIXED_SHIFT
// transform x // transform x
ldmdb m!, {mx, my, mz, x} ldmdb m!, {mx, my, mz, x}
@@ -121,21 +121,23 @@ transformRoom_asm:
mul x, dz, x mul x, dz, x
mul y, dz, y mul y, dz, y
mov x, x, asr #(16 - PROJ_SHIFT) mov x, x, asr #(16 - PROJ_SHIFT)
// keep y shifted by 16 for min/max cmp mov y, y, asr #(16 - PROJ_SHIFT)
// viewport clipping // viewport clipping
ldmia sp, {m, minXY, maxXY} // preload matrix ldmia sp, {m, minXY, maxXY} // preload matrix
cmp x, minXY, asr #16 cmp x, minXY, asr #16
orrle vg, vg, #CLIP_LEFT orrle vg, vg, #CLIP_LEFT
cmp y, minXY, lsl #16
orrle vg, vg, #CLIP_TOP
cmp x, maxXY, asr #16 cmp x, maxXY, asr #16
orrge vg, vg, #CLIP_RIGHT orrge vg, vg, #CLIP_RIGHT
cmp y, maxXY, lsl #16
orrge vg, vg, #CLIP_BOTTOM
mov y, y, asr #16 mov minXY, minXY, lsl #16
mov maxXY, maxXY, lsl #16
cmp y, minXY, asr #16
orrle vg, vg, #CLIP_TOP
cmp y, maxXY, asr #16
orrge vg, vg, #CLIP_BOTTOM
add x, x, #(FRAME_WIDTH >> 1) add x, x, #(FRAME_WIDTH >> 1)
add y, y, #(FRAME_HEIGHT >> 1) add y, y, #(FRAME_HEIGHT >> 1)