mirror of
https://github.com/XProger/OpenLara.git
synced 2025-03-13 23:59:41 +01:00
#368 micro optimizations of rasterizer, remove per-pixel texturing (unused)
This commit is contained in:
parent
401c854209
commit
b6df8a2348
@ -1,8 +1,6 @@
|
||||
.section .iwram
|
||||
.arm
|
||||
|
||||
#define TEX_2PX // lazy texturing, comment out for per-pixel
|
||||
|
||||
#define FRAME_WIDTH 240
|
||||
#define FRAME_HEIGHT 160
|
||||
|
||||
@ -45,9 +43,11 @@
|
||||
.equ CLIP_MASK_VP, (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM)
|
||||
.equ CLIP_MASK, (CLIP_MASK_VP + CLIP_FAR + CLIP_NEAR)
|
||||
|
||||
.equ FACE_TEXTURE_BITS, 13
|
||||
.equ FACE_TRIANGLE, (1 << FACE_TEXTURE_BITS)
|
||||
.equ FACE_TEXTURE_BITS, 14
|
||||
.equ FACE_TEXTURE, ((1 << FACE_TEXTURE_BITS) - 1)
|
||||
.equ FACE_GOURAUD, (2 << FACE_TYPE_SHIFT)
|
||||
.equ FACE_CLIPPED, (1 << 18)
|
||||
.equ FACE_TRIANGLE, (1 << 19)
|
||||
|
||||
.equ FACE_FLAGS, 0
|
||||
.equ FACE_NEXT, 4
|
||||
@ -67,9 +67,6 @@
|
||||
.equ FACE_TYPE_LINE_H, (8 << FACE_TYPE_SHIFT)
|
||||
.equ FACE_TYPE_LINE_V, (9 << FACE_TYPE_SHIFT)
|
||||
|
||||
.equ FACE_GOURAUD, (2 << FACE_TYPE_SHIFT)
|
||||
.equ FACE_CLIPPED, (1 << 18)
|
||||
|
||||
.equ FIXED_SHIFT, 14
|
||||
.equ PROJ_SHIFT, 4
|
||||
.equ OT_SHIFT, 4
|
||||
@ -90,6 +87,7 @@
|
||||
|
||||
.equ MAX_CAUSTICS, 32
|
||||
.equ MAX_RAND_TABLE, 32
|
||||
.equ MAX_ANIM_TEX, 128
|
||||
|
||||
.equ MIN_INT32, 0x80000000
|
||||
.equ MAX_INT32, 0x7FFFFFFF
|
||||
@ -110,6 +108,17 @@
|
||||
ble \skip
|
||||
.endm
|
||||
|
||||
.macro scaleUV uv, u, v, f
|
||||
asr \u, \uv, #16
|
||||
mul \u, \f // u = f * int16(uv >> 16)
|
||||
lsl \v, \uv, #16
|
||||
asr \v, #16
|
||||
mul \v, \f // v = f * int16(uv)
|
||||
lsr \u, #16
|
||||
lsl \u, #16
|
||||
orr \uv, \u, \v, lsr #16 // uv = (u & 0xFFFF0000) | (v >> 16)
|
||||
.endm
|
||||
|
||||
.macro tex index, uv
|
||||
and \index, \uv, #0xFF00
|
||||
orr \index, \uv, lsr #24 // index = v * 256 + u
|
||||
|
@ -89,6 +89,8 @@ faceAddMeshTriangles_asm:
|
||||
orr vp1, vp0, vp1, lsl #(16 - 3)
|
||||
mov vp2, vp2, lsr #3
|
||||
|
||||
orr flags, #FACE_TRIANGLE
|
||||
|
||||
ldr ot, =gOT
|
||||
ldr next, [ot, depth, lsl #2]
|
||||
str face, [ot, depth, lsl #2]
|
||||
|
@ -96,6 +96,8 @@ faceAddRoomTriangles_asm:
|
||||
orr vp1, vp0, vp1, lsl #(16 - 3)
|
||||
mov vp2, vp2, lsr #3
|
||||
|
||||
orr flags, #FACE_TRIANGLE
|
||||
|
||||
ldr ot, =gOT
|
||||
ldr next, [ot, depth, lsl #2]
|
||||
str face, [ot, depth, lsl #2]
|
||||
|
@ -25,6 +25,7 @@ verticesBase .req vZG
|
||||
facesBase .req vZG
|
||||
vertex .req vZG
|
||||
texture .req tmp
|
||||
texAnim .req vXY
|
||||
texIndex .req tmp
|
||||
texTile .req tmp
|
||||
sprite .req tmp
|
||||
@ -121,8 +122,12 @@ flush_asm:
|
||||
|
||||
.set_texture:
|
||||
mov texIndex, flags, lsl #(32 - FACE_TEXTURE_BITS)
|
||||
//cmp texIndex, #(MAX_ANIM_TEX << (32 - FACE_TEXTURE_BITS)) // TODO split to animated and static textures arrays
|
||||
add texIndex, texIndex, texIndex, lsl #1
|
||||
add texture, TEXTURES, texIndex, lsr #(32 - FACE_TEXTURE_BITS - 2)
|
||||
//addge texture, TEXTURES, texIndex, lsr #(32 - FACE_TEXTURE_BITS - 2)
|
||||
//ldrlt texAnim, =gAnimTextures
|
||||
//addlt texture, texAnim, texIndex, lsr #(32 - FACE_TEXTURE_BITS - 2)
|
||||
|
||||
ldmia texture, {texTile, uv01, uv23}
|
||||
str texTile, [TILE]
|
||||
|
@ -1,11 +1,11 @@
|
||||
#include "common_asm.inc"
|
||||
|
||||
flags .req r0
|
||||
top .req r1
|
||||
y .req r2
|
||||
width .req r3
|
||||
pixel .req flags
|
||||
L .req r1
|
||||
R .req r2
|
||||
y .req r3
|
||||
type .req r12
|
||||
pixel .req flags
|
||||
|
||||
.extern rasterizeS_asm
|
||||
.extern rasterizeF_asm
|
||||
@ -22,22 +22,19 @@ type .req r12
|
||||
.global rasterize_asm
|
||||
rasterize_asm:
|
||||
and type, flags, #FACE_TYPE_MASK
|
||||
|
||||
cmp type, #FACE_TYPE_F
|
||||
streqb flags, [top, #VERTEX_CLIP]
|
||||
andeq R, flags, #0xFF // R = face color for FACE_TYPE_F
|
||||
movne R, L // R = L otherwise
|
||||
|
||||
ldr pixel, =fb
|
||||
ldr pixel, [pixel]
|
||||
ldrsh y, [top, #VERTEX_Y]
|
||||
ldrsh y, [L, #VERTEX_Y]
|
||||
|
||||
#if (FRAME_WIDTH == 240) // pixel += (y * 16 - y) * 16
|
||||
// pixel += y * 240 -> (y * 16 - y) * 16
|
||||
rsb y, y, y, lsl #4
|
||||
add pixel, pixel, y, lsl #4
|
||||
#else
|
||||
mov width, #FRAME_WIDTH
|
||||
mla pixel, y, width, pixel
|
||||
#endif
|
||||
|
||||
mov r2, top
|
||||
add pc, type, lsr #(FACE_TYPE_SHIFT - 2)
|
||||
nop
|
||||
b rasterizeS_asm
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
pixel .req r0
|
||||
L .req r1
|
||||
R .req r2
|
||||
color .req r2
|
||||
index .req r3
|
||||
Lh .req r4
|
||||
Rh .req r5
|
||||
@ -15,6 +15,7 @@ tmp .req r11
|
||||
DIVLUT .req r12
|
||||
width .req lr
|
||||
|
||||
R .req color
|
||||
h .req N
|
||||
Rxy .req tmp
|
||||
Ry2 .req Rh
|
||||
@ -22,19 +23,16 @@ Lxy .req tmp
|
||||
Ly2 .req Lh
|
||||
LMAP .req Lx
|
||||
pair .req DIVLUT
|
||||
blocks .req DIVLUT
|
||||
|
||||
.global rasterizeF_asm
|
||||
rasterizeF_asm:
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
|
||||
mov LMAP, #LMAP_ADDR
|
||||
|
||||
// TODO use ldrh, swap g and clip
|
||||
add LMAP, color, #LMAP_ADDR
|
||||
ldrb tmp, [L, #VERTEX_G]
|
||||
ldrb index, [L, #VERTEX_CLIP]
|
||||
orr tmp, index, tmp, lsl #8 // tmp = index | (L->v.g << 8)
|
||||
ldrb index, [LMAP, tmp] // tmp = lightmap[tmp]
|
||||
ldrb index, [LMAP, tmp, lsl #8] // index = lightmap[color + L->v.g * 256]
|
||||
|
||||
mov R, L
|
||||
|
||||
mov Lh, #0 // Lh = 0
|
||||
mov Rh, #0 // Rh = 0
|
||||
|
@ -17,6 +17,8 @@ Lt .req r11
|
||||
Rt .req r12
|
||||
h .req lr
|
||||
|
||||
ptr .req tmp
|
||||
|
||||
Ldx .req h
|
||||
Rdx .req h
|
||||
|
||||
@ -41,14 +43,14 @@ duv .req R
|
||||
du .req L
|
||||
dv .req R
|
||||
|
||||
Lduv .req h
|
||||
Ldu .req N
|
||||
Ldv .req h
|
||||
|
||||
Rduv .req h
|
||||
Rdu .req N
|
||||
Rdv .req h
|
||||
|
||||
Rti .req indexB
|
||||
|
||||
sLdx .req tmp
|
||||
sLdt .req N
|
||||
sRdx .req Lh
|
||||
@ -58,32 +60,25 @@ SP_LDX = 0
|
||||
SP_LDT = 4
|
||||
SP_RDX = 8
|
||||
SP_RDT = 12
|
||||
SP_L = 16
|
||||
SP_R = 20
|
||||
SP_LH = 24
|
||||
SP_RH = 28
|
||||
SP_SIZE = 32
|
||||
|
||||
.macro PUT_PIXELS
|
||||
tex indexA, t
|
||||
lit indexA
|
||||
|
||||
#ifndef TEX_2PX
|
||||
add t, dtdx
|
||||
|
||||
tex indexB, t
|
||||
lit indexB
|
||||
add t, dtdx
|
||||
|
||||
orr indexA, indexB, lsl #8
|
||||
strh indexA, [tmp], #2
|
||||
#else
|
||||
add t, dtdx, lsl #1
|
||||
|
||||
//orr indexA, indexA, lsl #8
|
||||
strb indexA, [tmp], #2 // writing a byte to GBA VRAM will write a half word for free
|
||||
#endif
|
||||
strb indexA, [ptr], #2 // writing a byte to GBA VRAM will write a half word for free
|
||||
.endm
|
||||
|
||||
.global rasterizeFT_asm
|
||||
rasterizeFT_asm:
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
|
||||
sub sp, #SP_SIZE // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
|
||||
|
||||
mov LMAP, #LMAP_ADDR
|
||||
ldrb tmp, [L, #VERTEX_G]
|
||||
@ -125,16 +120,9 @@ rasterizeFT_asm:
|
||||
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
|
||||
str Ldx, [sp, #SP_LDX] // store Ldx to stack
|
||||
|
||||
ldr Lduv, [L, #VERTEX_T]
|
||||
sub Lduv, Lt // Lduv = N->v.t - Lt
|
||||
asr Ldu, Lduv, #16
|
||||
mul Ldu, tmp // Rdu = tmp * int16(Lduv >> 16)
|
||||
lsl Ldv, Lduv, #16
|
||||
asr Ldv, #16
|
||||
mul Ldv, tmp // Rdv = tmp * int16(Lduv)
|
||||
lsr Ldu, #16
|
||||
lsl Ldu, #16
|
||||
orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
|
||||
ldr Ldt, [L, #VERTEX_T]
|
||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
||||
scaleUV Ldt, Ldu, Ldv, tmp
|
||||
str Ldt, [sp, #SP_LDT] // store Ldt to stack
|
||||
.calc_left_end:
|
||||
|
||||
@ -166,16 +154,9 @@ rasterizeFT_asm:
|
||||
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
|
||||
str Rdx, [sp, #SP_RDX] // store Rdx to stack
|
||||
|
||||
ldr Rduv, [R, #VERTEX_T]
|
||||
sub Rduv, Rt // Rduv = N->v.t - Rt
|
||||
asr Rdu, Rduv, #16
|
||||
mul Rdu, tmp // Rdu = tmp * int16(Rduv >> 16)
|
||||
lsl Rdv, Rduv, #16
|
||||
asr Rdv, #16
|
||||
mul Rdv, tmp // Rdv = tmp * int16(Rduv)
|
||||
lsr Rdu, #16
|
||||
lsl Rdu, #16
|
||||
orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
|
||||
ldr Rdt, [R, #VERTEX_T]
|
||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
||||
scaleUV Rdt, Rdu, Rdv, tmp
|
||||
str Rdt, [sp, #SP_RDT] // store Rdt to stack
|
||||
.calc_right_end:
|
||||
|
||||
@ -185,44 +166,36 @@ rasterizeFT_asm:
|
||||
sub Lh, h // Lh -= h
|
||||
sub Rh, h // Rh -= h
|
||||
|
||||
stmfd sp!, {L,R,Lh,Rh} // sp-16
|
||||
add tmp, sp, #SP_L
|
||||
stmia tmp, {L, R, Lh, Rh}
|
||||
|
||||
.scanline_start:
|
||||
asr tmp, Lx, #16 // x1 = (Lx >> 16)
|
||||
rsbs width, tmp, Rx, asr #16 // width = (Rx >> 16) - x1
|
||||
ble .scanline_end // if (width <= 0) go next scanline
|
||||
|
||||
add tmp, pixel, tmp // tmp = pixel + x1
|
||||
add ptr, pixel, tmp // ptr = pixel + x1
|
||||
|
||||
mov DIVLUTi, #DIVLUT_ADDR
|
||||
lsl inv, width, #1
|
||||
ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width)
|
||||
|
||||
sub duv, Rt, Lt // duv = Rt - Lt
|
||||
asr du, duv, #16
|
||||
mul du, inv // du = inv * int16(duv >> 16)
|
||||
lsl dv, duv, #16
|
||||
asr dv, #16
|
||||
mul dv, inv // dv = inv * int16(duv)
|
||||
lsr du, #16
|
||||
lsl du, #16
|
||||
orr dtdx, du, dv, lsr #16 // dtdx = (du & 0xFFFF0000) | (dv >> 16)
|
||||
sub dtdx, Rt, Lt // duv = Rt - Lt
|
||||
scaleUV dtdx, du, dv, inv
|
||||
|
||||
mov t, Lt // t = Lt
|
||||
|
||||
// 2 bytes alignment (VRAM write requirement)
|
||||
.align_left:
|
||||
tst tmp, #1 // if (tmp & 1)
|
||||
tst ptr, #1 // if (ptr & 1)
|
||||
beq .align_right
|
||||
ldrb indexB, [tmp, #-1]! // read pal index from VRAM (byte)
|
||||
|
||||
and indexA, t, #0xFF00
|
||||
orr indexA, t, lsr #24 // res = (t & 0xFF00) | (t >> 24)
|
||||
ldrb indexA, [TILE, indexA]
|
||||
ldrb indexA, [LMAP, indexA]
|
||||
tex indexA, t
|
||||
lit indexA
|
||||
|
||||
ldrb indexB, [ptr, #-1]! // read pal index from VRAM (byte)
|
||||
orr indexB, indexA, lsl #8
|
||||
strh indexB, [tmp], #2
|
||||
strh indexB, [ptr], #2
|
||||
add t, dtdx
|
||||
|
||||
subs width, #1 // width--
|
||||
@ -231,19 +204,15 @@ rasterizeFT_asm:
|
||||
.align_right:
|
||||
tst width, #1
|
||||
beq .align_block_4px
|
||||
ldrb indexB, [tmp, width]
|
||||
|
||||
sub Rti, Rt, dtdx
|
||||
tex indexA, Rti
|
||||
lit indexA
|
||||
|
||||
ldrb indexB, [ptr, width]
|
||||
subs width, #1 // width--
|
||||
|
||||
sub Rt, dtdx
|
||||
and indexA, Rt, #0xFF00
|
||||
orr indexA, Rt, lsr #24 // res = (t & 0xFF00) | (t >> 24)
|
||||
add Rt, dtdx
|
||||
ldrb indexA, [TILE, indexA]
|
||||
ldrb indexA, [LMAP, indexA]
|
||||
|
||||
orr indexB, indexA, indexB, lsl #8
|
||||
strh indexB, [tmp, width]
|
||||
strh indexB, [ptr, width]
|
||||
|
||||
beq .scanline_end // if (width == 0)
|
||||
|
||||
@ -276,8 +245,7 @@ rasterizeFT_asm:
|
||||
bne .scanline_block_8px
|
||||
|
||||
.scanline_end:
|
||||
add tmp, sp, #16
|
||||
ldmia tmp, {sLdx, sLdt, sRdx, sRdt}
|
||||
ldmia sp, {sLdx, sLdt, sRdx, sRdt}
|
||||
add Lx, sLdx
|
||||
add Lt, sLdt
|
||||
add Rx, sRdx
|
||||
@ -288,9 +256,10 @@ rasterizeFT_asm:
|
||||
subs h, #1
|
||||
bne .scanline_start
|
||||
|
||||
ldmfd sp!, {L,R,Lh,Rh} // sp+16
|
||||
add tmp, sp, #SP_L
|
||||
ldmia tmp, {L, R, Lh, Rh}
|
||||
b .loop
|
||||
|
||||
.exit:
|
||||
add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
|
||||
add sp, #SP_SIZE // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
|
||||
ldmfd sp!, {r4-r11, pc}
|
@ -17,6 +17,8 @@ Lt .req r11
|
||||
Rt .req r12
|
||||
h .req lr
|
||||
|
||||
ptr .req tmp
|
||||
|
||||
Ldx .req h
|
||||
Rdx .req h
|
||||
|
||||
@ -41,14 +43,14 @@ duv .req R
|
||||
du .req L
|
||||
dv .req R
|
||||
|
||||
Lduv .req h
|
||||
Ldu .req N
|
||||
Ldv .req h
|
||||
|
||||
Rduv .req h
|
||||
Rdu .req N
|
||||
Rdv .req h
|
||||
|
||||
Rti .req indexB
|
||||
|
||||
sLdx .req tmp
|
||||
sLdt .req N
|
||||
sRdx .req Lh
|
||||
@ -58,38 +60,25 @@ SP_LDX = 0
|
||||
SP_LDT = 4
|
||||
SP_RDX = 8
|
||||
SP_RDT = 12
|
||||
SP_L = 16
|
||||
SP_R = 20
|
||||
SP_LH = 24
|
||||
SP_RH = 28
|
||||
SP_SIZE = 32
|
||||
|
||||
.macro PUT_PIXELS
|
||||
#ifndef TEX_2PX
|
||||
tex indexA, t
|
||||
add t, dtdx
|
||||
|
||||
tex indexB, t
|
||||
add t, dtdx
|
||||
|
||||
// cheap non-accurate alpha test, skip pixels pair if one or both are transparent
|
||||
ands indexA, #255
|
||||
andnes indexB, #255
|
||||
orrne indexB, indexA, indexB, lsl #8 // indexB = indexA | (indexB << 8)
|
||||
ldrneb indexA, [LMAP, indexA]
|
||||
ldrneb indexB, [LMAP, indexB, lsr #8]
|
||||
orrne indexA, indexB, lsl #8
|
||||
strneh indexA, [tmp]
|
||||
add tmp, #2
|
||||
#else
|
||||
tex indexA, t
|
||||
add t, dtdx, lsl #1
|
||||
cmp indexA, #0
|
||||
ldrneb indexA, [LMAP, indexA]
|
||||
strneb indexA, [tmp]
|
||||
add tmp, #2
|
||||
#endif
|
||||
strneb indexA, [ptr]
|
||||
add ptr, #2
|
||||
.endm
|
||||
|
||||
.global rasterizeFTA_asm
|
||||
rasterizeFTA_asm:
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
sub sp, #16 // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
|
||||
sub sp, #SP_SIZE // reserve stack space for [Ldx, Ldt, Rdx, Rdt]
|
||||
|
||||
mov LMAP, #LMAP_ADDR
|
||||
ldrb tmp, [L, #VERTEX_G]
|
||||
@ -131,16 +120,9 @@ rasterizeFTA_asm:
|
||||
mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx)
|
||||
str Ldx, [sp, #SP_LDX] // store Ldx to stack
|
||||
|
||||
ldr Lduv, [L, #VERTEX_T]
|
||||
sub Lduv, Lt // Lduv = N->v.t - Lt
|
||||
asr Ldu, Lduv, #16
|
||||
mul Ldu, tmp // Rdu = tmp * int16(Lduv >> 16)
|
||||
lsl Ldv, Lduv, #16
|
||||
asr Ldv, #16
|
||||
mul Ldv, tmp // Rdv = tmp * int16(Lduv)
|
||||
lsr Ldu, #16
|
||||
lsl Ldu, #16
|
||||
orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
|
||||
ldr Ldt, [L, #VERTEX_T]
|
||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
||||
scaleUV Ldt, Ldu, Ldv, tmp
|
||||
str Ldt, [sp, #SP_LDT] // store Ldt to stack
|
||||
.calc_left_end:
|
||||
|
||||
@ -172,16 +154,9 @@ rasterizeFTA_asm:
|
||||
mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx)
|
||||
str Rdx, [sp, #SP_RDX] // store Rdx to stack
|
||||
|
||||
ldr Rduv, [R, #VERTEX_T]
|
||||
sub Rduv, Rt // Rduv = N->v.t - Rt
|
||||
asr Rdu, Rduv, #16
|
||||
mul Rdu, tmp // Rdu = tmp * int16(Rduv >> 16)
|
||||
lsl Rdv, Rduv, #16
|
||||
asr Rdv, #16
|
||||
mul Rdv, tmp // Rdv = tmp * int16(Rduv)
|
||||
lsr Rdu, #16
|
||||
lsl Rdu, #16
|
||||
orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
|
||||
ldr Rdt, [R, #VERTEX_T]
|
||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
||||
scaleUV Rdt, Rdu, Rdv, tmp
|
||||
str Rdt, [sp, #SP_RDT] // store Rdt to stack
|
||||
.calc_right_end:
|
||||
|
||||
@ -191,46 +166,38 @@ rasterizeFTA_asm:
|
||||
sub Lh, h // Lh -= h
|
||||
sub Rh, h // Rh -= h
|
||||
|
||||
stmfd sp!, {L,R,Lh,Rh} // sp-16
|
||||
add tmp, sp, #SP_L
|
||||
stmia tmp, {L, R, Lh, Rh}
|
||||
|
||||
.scanline_start:
|
||||
asr tmp, Lx, #16 // x1 = (Lx >> 16)
|
||||
rsbs width, tmp, Rx, asr #16 // width = (Rx >> 16) - x1
|
||||
ble .scanline_end // if (width <= 0) go next scanline
|
||||
|
||||
add tmp, pixel, tmp // tmp = pixel + x1
|
||||
add ptr, pixel, tmp // ptr = pixel + x1
|
||||
|
||||
mov DIVLUTi, #DIVLUT_ADDR
|
||||
lsl inv, width, #1
|
||||
ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width)
|
||||
|
||||
sub duv, Rt, Lt // duv = Rt - Lt
|
||||
asr du, duv, #16
|
||||
mul du, inv // du = inv * int16(duv >> 16)
|
||||
lsl dv, duv, #16
|
||||
asr dv, #16
|
||||
mul dv, inv // dv = inv * int16(duv)
|
||||
lsr du, #16
|
||||
lsl du, #16
|
||||
orr dtdx, du, dv, lsr #16 // dtdx = (du & 0xFFFF0000) | (dv >> 16)
|
||||
sub dtdx, Rt, Lt // duv = Rt - Lt
|
||||
scaleUV dtdx, du, dv, inv
|
||||
|
||||
mov t, Lt // t = Lt
|
||||
|
||||
// 2 bytes alignment (VRAM write requirement)
|
||||
.align_left:
|
||||
tst tmp, #1 // if (tmp & 1)
|
||||
tst ptr, #1 // if (ptr & 1)
|
||||
beq .align_right
|
||||
|
||||
and indexA, t, #0xFF00
|
||||
orr indexA, t, lsr #24 // res = (t & 0xFF00) | (t >> 24)
|
||||
ldrb indexA, [TILE, indexA]
|
||||
tex indexA, t
|
||||
|
||||
cmp indexA, #0
|
||||
ldrneb indexB, [tmp, #-1]! // read pal index from VRAM (byte)
|
||||
ldrneb indexB, [ptr, #-1]! // read pal index from VRAM (byte)
|
||||
ldrneb indexA, [LMAP, indexA]
|
||||
orrne indexB, indexA, lsl #8
|
||||
strneh indexB, [tmp], #2
|
||||
addeq tmp, #1
|
||||
strneh indexB, [ptr], #2
|
||||
addeq ptr, #1
|
||||
add t, dtdx
|
||||
|
||||
subs width, #1 // width--
|
||||
@ -240,17 +207,14 @@ rasterizeFTA_asm:
|
||||
tst width, #1
|
||||
beq .align_block_4px
|
||||
|
||||
sub Rt, dtdx
|
||||
and indexA, Rt, #0xFF00
|
||||
orr indexA, Rt, lsr #24 // res = (t & 0xFF00) | (t >> 24)
|
||||
add Rt, dtdx
|
||||
ldrb indexA, [TILE, indexA]
|
||||
sub Rti, Rt, dtdx
|
||||
tex indexA, Rti
|
||||
|
||||
cmp indexA, #0
|
||||
ldrneb indexA, [LMAP, indexA]
|
||||
ldrneb indexB, [tmp, width]
|
||||
ldrneb indexB, [ptr, width]
|
||||
orrne indexB, indexA, indexB, lsl #8
|
||||
addne indexA, tmp, width
|
||||
addne indexA, ptr, width
|
||||
strneh indexB, [indexA, #-1]
|
||||
|
||||
subs width, #1 // width--
|
||||
@ -285,8 +249,7 @@ rasterizeFTA_asm:
|
||||
bne .scanline_block_8px
|
||||
|
||||
.scanline_end:
|
||||
add tmp, sp, #16
|
||||
ldmia tmp, {sLdx, sLdt, sRdx, sRdt}
|
||||
ldmia sp, {sLdx, sLdt, sRdx, sRdt}
|
||||
add Lx, sLdx
|
||||
add Lt, sLdt
|
||||
add Rx, sRdx
|
||||
@ -297,9 +260,10 @@ rasterizeFTA_asm:
|
||||
subs h, #1
|
||||
bne .scanline_start
|
||||
|
||||
ldmfd sp!, {L,R,Lh,Rh} // sp+16
|
||||
add tmp, sp, #SP_L
|
||||
ldmia tmp, {L, R, Lh, Rh}
|
||||
b .loop
|
||||
|
||||
.exit:
|
||||
add sp, #16 // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
|
||||
add sp, #SP_SIZE // revert reserved space for [Ldx, Ldt, Rdx, Rdt]
|
||||
ldmfd sp!, {r4-r11, pc}
|
@ -35,7 +35,7 @@ Ldt .req h
|
||||
Rdt .req h
|
||||
|
||||
indexA .req Lh
|
||||
indexB .req Rh
|
||||
indexB .req tmp
|
||||
|
||||
Rxy .req tmp
|
||||
Ry2 .req Rh
|
||||
@ -47,23 +47,19 @@ DIVLUT .req N
|
||||
DIVLUTi .req tmp
|
||||
|
||||
ptr .req Lx
|
||||
width .req Rx
|
||||
width .req Rh
|
||||
|
||||
g .req Lg
|
||||
dgdx .req Rg
|
||||
dgdx .req L
|
||||
|
||||
t .req Lt
|
||||
dtdx .req Rt
|
||||
|
||||
duv .req R
|
||||
dtdx .req R
|
||||
du .req L
|
||||
dv .req R
|
||||
|
||||
Lduv .req N
|
||||
Ldu .req TILE
|
||||
Ldv .req N
|
||||
|
||||
Rduv .req N
|
||||
Rdu .req TILE
|
||||
Rdv .req N
|
||||
|
||||
@ -75,7 +71,7 @@ sLdg .req R
|
||||
sLdt .req Lh
|
||||
sRdx .req Rh
|
||||
sRdg .req tmp
|
||||
sRdt .req N // not used in ldm due h collision
|
||||
sRdt .req tmp // not enough regs for one ldmia
|
||||
|
||||
SP_LDX = 0
|
||||
SP_LDG = 4
|
||||
@ -83,6 +79,12 @@ SP_LDT = 8
|
||||
SP_RDX = 12
|
||||
SP_RDG = 16
|
||||
SP_RDT = 20
|
||||
SP_L = 24
|
||||
SP_R = 28
|
||||
SP_LH = 32
|
||||
SP_RH = 36
|
||||
SP_SIZE = 40
|
||||
SP_TILE = SP_SIZE
|
||||
|
||||
.macro PUT_PIXELS
|
||||
bic LMAP, g, #255
|
||||
@ -91,28 +93,18 @@ SP_RDT = 20
|
||||
tex indexA, t
|
||||
lit indexA
|
||||
|
||||
#ifndef TEX_2PX
|
||||
add t, dtdx
|
||||
|
||||
tex indexB, t
|
||||
lit indexB
|
||||
|
||||
add t, dtdx
|
||||
|
||||
orr indexA, indexB, lsl #8
|
||||
strh indexA, [ptr], #2
|
||||
#else
|
||||
add t, dtdx, lsl #1
|
||||
|
||||
//orr indexA, indexA, lsl #8
|
||||
strb indexA, [ptr], #2 // writing a byte to GBA VRAM will write a half word for free
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.global rasterizeGT_asm
|
||||
rasterizeGT_asm:
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||
ldr r3, =gTile
|
||||
ldr r3, [r3]
|
||||
|
||||
stmfd sp!, {r3-r11, lr}
|
||||
sub sp, #SP_SIZE // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||
|
||||
mov Lh, #0 // Lh = 0
|
||||
mov Rh, #0 // Rh = 0
|
||||
@ -155,16 +147,9 @@ rasterizeGT_asm:
|
||||
asr Ldg, #8 // 8-bit for fractional part
|
||||
str Ldg, [sp, #SP_LDG] // store Ldg to stack
|
||||
|
||||
ldr Lduv, [L, #VERTEX_T]
|
||||
sub Lduv, Lt // Lduv = N->v.t - Lt
|
||||
asr Ldu, Lduv, #16
|
||||
mul Ldu, tmp // Rdu = tmp * int16(Lduv >> 16)
|
||||
lsl Ldv, Lduv, #16
|
||||
asr Ldv, #16
|
||||
mul Ldv, tmp // Rdv = tmp * int16(Lduv)
|
||||
lsr Ldu, #16
|
||||
lsl Ldu, #16
|
||||
orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
|
||||
ldr Ldt, [L, #VERTEX_T]
|
||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
||||
scaleUV Ldt, Ldu, Ldv, tmp
|
||||
str Ldt, [sp, #SP_LDT] // store Ldt to stack
|
||||
.calc_left_end:
|
||||
|
||||
@ -204,16 +189,9 @@ rasterizeGT_asm:
|
||||
asr Rdg, #8 // 8-bit for fractional part
|
||||
str Rdg, [sp, #SP_RDG] // store Ldg to stack
|
||||
|
||||
ldr Rduv, [R, #VERTEX_T]
|
||||
sub Rduv, Rt // Rduv = N->v.t - Rt
|
||||
asr Rdu, Rduv, #16
|
||||
mul Rdu, tmp // Rdu = tmp * int16(Rduv >> 16)
|
||||
lsl Rdv, Rduv, #16
|
||||
asr Rdv, #16
|
||||
mul Rdv, tmp // Rdv = tmp * int16(Rduv)
|
||||
lsr Rdu, #16
|
||||
lsl Rdu, #16
|
||||
orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
|
||||
ldr Rdt, [R, #VERTEX_T]
|
||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
||||
scaleUV Rdt, Rdu, Rdv, tmp
|
||||
str Rdt, [sp, #SP_RDT] // store Rdt to stack
|
||||
.calc_right_end:
|
||||
|
||||
@ -226,54 +204,44 @@ rasterizeGT_asm:
|
||||
sub Lh, h // Lh -= h
|
||||
sub Rh, h // Rh -= h
|
||||
|
||||
ldr TILE, =gTile
|
||||
ldr TILE, [TILE]
|
||||
ldr TILE, [sp, #SP_TILE]
|
||||
|
||||
stmfd sp!, {L,R,Lh,Rh} // sp-16
|
||||
add tmp, sp, #SP_L
|
||||
stmia tmp, {L, R, Lh, Rh}
|
||||
|
||||
.scanline_start:
|
||||
stmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp-24
|
||||
stmfd sp!, {Lx, Lg, Lt}
|
||||
|
||||
asr tmp, Lx, #16 // x1 = (Lx >> 16)
|
||||
rsbs width, tmp, Rx, asr #16 // width = (Rx >> 16) - x1
|
||||
asr Lx, Lx, #16 // x1 = (Lx >> 16)
|
||||
rsbs width, Lx, Rx, asr #16 // width = (Rx >> 16) - x1
|
||||
ble .scanline_end // if (width <= 0) go next scanline
|
||||
|
||||
add ptr, pixel, tmp // ptr = pixel + x1
|
||||
add ptr, pixel, Lx // ptr = pixel + x1
|
||||
|
||||
mov DIVLUTi, #DIVLUT_ADDR
|
||||
lsl inv, width, #1
|
||||
ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width)
|
||||
|
||||
sub dtdx, Rt, Lt // dtdx = Rt - Lt
|
||||
scaleUV dtdx, du, dv, inv
|
||||
// t == Lt (alias)
|
||||
|
||||
sub dgdx, Rg, Lg // dgdx = Rg - Lg
|
||||
mul dgdx, inv // dgdx *= FixedInvU(width)
|
||||
asr dgdx, #15 // dgdx >>= 15
|
||||
// g == Lg (alias)
|
||||
|
||||
sub duv, Rt, Lt // duv = Rt - Lt
|
||||
asr du, duv, #16
|
||||
mul du, inv // du = inv * int16(duv >> 16)
|
||||
lsl dv, duv, #16
|
||||
asr dv, #16
|
||||
mul dv, inv // dv = inv * int16(duv)
|
||||
lsr du, #16
|
||||
lsl du, #16
|
||||
orr dtdx, du, dv, lsr #16 // dtdx = (du & 0xFFFF0000) | (dv >> 16)
|
||||
// t == Lt (alias)
|
||||
|
||||
// 2 bytes alignment (VRAM write requirement)
|
||||
.align_left:
|
||||
tst ptr, #1 // if (ptr & 1)
|
||||
beq .align_right
|
||||
ldrb indexB, [ptr, #-1]! // read pal index from VRAM (byte)
|
||||
|
||||
bic LMAP, g, #255
|
||||
add g, dgdx, asr #1
|
||||
tex indexA, t
|
||||
lit indexA
|
||||
|
||||
and indexA, t, #0xFF00
|
||||
orr indexA, t, lsr #24 // res = (t & 0xFF00) | (t >> 24)
|
||||
ldrb indexA, [TILE, indexA]
|
||||
ldrb indexA, [LMAP, indexA]
|
||||
|
||||
ldrb indexB, [ptr, #-1]! // read pal index from VRAM (byte)
|
||||
orr indexB, indexA, lsl #8
|
||||
strh indexB, [ptr], #2
|
||||
add t, dtdx
|
||||
@ -284,21 +252,16 @@ rasterizeGT_asm:
|
||||
.align_right:
|
||||
tst width, #1
|
||||
beq .align_block_4px
|
||||
ldrb indexB, [ptr, width]
|
||||
|
||||
subs width, #1 // width--
|
||||
sub Rti, Rt, dtdx
|
||||
tex indexA, Rti
|
||||
|
||||
mla Rti, width, dtdx, t // Rti = width * dtdx + t
|
||||
and indexA, Rti, #0xFF00
|
||||
orr indexA, Rti, lsr #24 // res = (t & 0xFF00) | (t >> 24)
|
||||
ldrb indexA, [TILE, indexA]
|
||||
|
||||
asr Rgi, dgdx, #1
|
||||
mla Rgi, width, Rgi, g // Rgi = width * (dgdx / 2) + g
|
||||
sub Rgi, Rg, dgdx, asr #1
|
||||
bic LMAP, Rgi, #255
|
||||
lit indexA
|
||||
|
||||
ldrb indexA, [LMAP, indexA]
|
||||
|
||||
ldrb indexB, [ptr, width]
|
||||
subs width, #1 // width--
|
||||
orr indexB, indexA, indexB, lsl #8
|
||||
strh indexB, [ptr, width]
|
||||
|
||||
@ -333,10 +296,9 @@ rasterizeGT_asm:
|
||||
bne .scanline_block_8px
|
||||
|
||||
.scanline_end:
|
||||
ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24
|
||||
ldmfd sp!, {Lx, Lg, Lt}
|
||||
|
||||
add tmp, sp, #16
|
||||
ldmia tmp, {sLdx, sLdg, sLdt, sRdx, sRdg}
|
||||
ldmia sp, {sLdx, sLdg, sLdt, sRdx, sRdg}
|
||||
|
||||
add Lx, sLdx
|
||||
add Lg, sLdg
|
||||
@ -344,17 +306,18 @@ rasterizeGT_asm:
|
||||
add Rx, sRdx
|
||||
add Rg, sRdg
|
||||
|
||||
ldr tmp, [sp, #(SP_RDT + 16)]
|
||||
add Rt, tmp // Rt += Rdt from stack
|
||||
ldr sRdt, [sp, #SP_RDT]
|
||||
add Rt, sRdt
|
||||
|
||||
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
|
||||
|
||||
subs h, #1
|
||||
bne .scanline_start
|
||||
|
||||
ldmfd sp!, {L,R,Lh,Rh} // sp+16
|
||||
add tmp, sp, #SP_L
|
||||
ldmia tmp, {L, R, Lh, Rh}
|
||||
b .loop
|
||||
|
||||
.exit:
|
||||
add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||
add sp, #(SP_SIZE + 4) // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt, TILE]
|
||||
ldmfd sp!, {r4-r11, pc}
|
@ -35,7 +35,7 @@ Ldt .req h
|
||||
Rdt .req h
|
||||
|
||||
indexA .req Lh
|
||||
indexB .req Rh
|
||||
indexB .req tmp
|
||||
|
||||
Rxy .req tmp
|
||||
Ry2 .req Rh
|
||||
@ -47,23 +47,21 @@ DIVLUT .req N
|
||||
DIVLUTi .req tmp
|
||||
|
||||
ptr .req Lx
|
||||
width .req Rx
|
||||
width .req Rh
|
||||
|
||||
g .req Lg
|
||||
dgdx .req Rg
|
||||
dgdx .req L
|
||||
|
||||
t .req Lt
|
||||
dtdx .req Rt
|
||||
dtdx .req R
|
||||
|
||||
duv .req R
|
||||
du .req L
|
||||
dv .req R
|
||||
|
||||
Lduv .req N
|
||||
Ldu .req TILE
|
||||
Ldv .req N
|
||||
|
||||
Rduv .req N
|
||||
Rdu .req TILE
|
||||
Rdv .req N
|
||||
|
||||
@ -75,7 +73,7 @@ sLdg .req R
|
||||
sLdt .req Lh
|
||||
sRdx .req Rh
|
||||
sRdg .req tmp
|
||||
sRdt .req N // not used in ldm due h collision
|
||||
sRdt .req tmp // not enough regs for one ldmia
|
||||
|
||||
SP_LDX = 0
|
||||
SP_LDG = 4
|
||||
@ -83,41 +81,32 @@ SP_LDT = 8
|
||||
SP_RDX = 12
|
||||
SP_RDG = 16
|
||||
SP_RDT = 20
|
||||
SP_L = 24
|
||||
SP_R = 28
|
||||
SP_LH = 32
|
||||
SP_RH = 36
|
||||
SP_SIZE = 40
|
||||
SP_TILE = SP_SIZE
|
||||
|
||||
.macro PUT_PIXELS
|
||||
bic LMAP, g, #255
|
||||
add g, dgdx
|
||||
|
||||
#ifndef TEX_2PX
|
||||
tex indexA, t
|
||||
add t, dtdx
|
||||
|
||||
tex indexB, t
|
||||
add t, dtdx
|
||||
|
||||
// cheap non-accurate alpha test, skip pixels pair if one or both are transparent
|
||||
ands indexA, #255
|
||||
andnes indexB, #255
|
||||
orrne indexB, indexA, indexB, lsl #8 // indexB = indexA | (indexB << 8)
|
||||
ldrneb indexA, [LMAP, indexA]
|
||||
ldrneb indexB, [LMAP, indexB, lsr #8]
|
||||
orrne indexA, indexB, lsl #8
|
||||
strneh indexA, [ptr]
|
||||
#else
|
||||
tex indexA, t
|
||||
add t, dtdx, lsl #1
|
||||
cmp indexA, #0
|
||||
ldrneb indexA, [LMAP, indexA]
|
||||
strneb indexA, [ptr]
|
||||
#endif
|
||||
|
||||
add ptr, #2
|
||||
.endm
|
||||
|
||||
.global rasterizeGTA_asm
|
||||
rasterizeGTA_asm:
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
sub sp, #24 // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||
ldr r3, =gTile
|
||||
ldr r3, [r3]
|
||||
|
||||
stmfd sp!, {r3-r11, lr}
|
||||
sub sp, #SP_SIZE // reserve stack space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||
|
||||
mov Lh, #0 // Lh = 0
|
||||
mov Rh, #0 // Rh = 0
|
||||
@ -160,16 +149,9 @@ rasterizeGTA_asm:
|
||||
asr Ldg, #8 // 8-bit for fractional part
|
||||
str Ldg, [sp, #SP_LDG] // store Ldg to stack
|
||||
|
||||
ldr Lduv, [L, #VERTEX_T]
|
||||
sub Lduv, Lt // Lduv = N->v.t - Lt
|
||||
asr Ldu, Lduv, #16
|
||||
mul Ldu, tmp // Rdu = tmp * int16(Lduv >> 16)
|
||||
lsl Ldv, Lduv, #16
|
||||
asr Ldv, #16
|
||||
mul Ldv, tmp // Rdv = tmp * int16(Lduv)
|
||||
lsr Ldu, #16
|
||||
lsl Ldu, #16
|
||||
orr Ldt, Ldu, Ldv, lsr #16 // Ldt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
|
||||
ldr Ldt, [L, #VERTEX_T]
|
||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
||||
scaleUV Ldt, Ldu, Ldv, tmp
|
||||
str Ldt, [sp, #SP_LDT] // store Ldt to stack
|
||||
.calc_left_end:
|
||||
|
||||
@ -209,16 +191,9 @@ rasterizeGTA_asm:
|
||||
asr Rdg, #8 // 8-bit for fractional part
|
||||
str Rdg, [sp, #SP_RDG] // store Ldg to stack
|
||||
|
||||
ldr Rduv, [R, #VERTEX_T]
|
||||
sub Rduv, Rt // Rduv = N->v.t - Rt
|
||||
asr Rdu, Rduv, #16
|
||||
mul Rdu, tmp // Rdu = tmp * int16(Rduv >> 16)
|
||||
lsl Rdv, Rduv, #16
|
||||
asr Rdv, #16
|
||||
mul Rdv, tmp // Rdv = tmp * int16(Rduv)
|
||||
lsr Rdu, #16
|
||||
lsl Rdu, #16
|
||||
orr Rdt, Rdu, Rdv, lsr #16 // Rdt = (Rdu & 0xFFFF0000) | (Rdv >> 16)
|
||||
ldr Rdt, [R, #VERTEX_T]
|
||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
||||
scaleUV Rdt, Rdu, Rdv, tmp
|
||||
str Rdt, [sp, #SP_RDT] // store Rdt to stack
|
||||
.calc_right_end:
|
||||
|
||||
@ -231,48 +206,39 @@ rasterizeGTA_asm:
|
||||
sub Lh, h // Lh -= h
|
||||
sub Rh, h // Rh -= h
|
||||
|
||||
ldr TILE, =gTile
|
||||
ldr TILE, [TILE]
|
||||
ldr TILE, [sp, #SP_TILE]
|
||||
|
||||
stmfd sp!, {L,R,Lh,Rh} // sp-16
|
||||
add tmp, sp, #SP_L
|
||||
stmia tmp, {L, R, Lh, Rh}
|
||||
|
||||
.scanline_start:
|
||||
stmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp-24
|
||||
stmfd sp!, {Lx, Lg, Lt}
|
||||
|
||||
asr tmp, Lx, #16 // x1 = (Lx >> 16)
|
||||
rsbs width, tmp, Rx, asr #16 // width = (Rx >> 16) - x1
|
||||
asr Lx, Lx, #16 // x1 = (Lx >> 16)
|
||||
rsbs width, Lx, Rx, asr #16 // width = (Rx >> 16) - x1
|
||||
ble .scanline_end // if (width <= 0) go next scanline
|
||||
|
||||
add ptr, pixel, tmp // ptr = pixel + x1
|
||||
add ptr, pixel, Lx // ptr = pixel + x1
|
||||
|
||||
mov DIVLUTi, #DIVLUT_ADDR
|
||||
lsl inv, width, #1
|
||||
ldrh inv, [DIVLUTi, inv] // inv = FixedInvU(width)
|
||||
|
||||
sub dtdx, Rt, Lt // dtdx = Rt - Lt
|
||||
scaleUV dtdx, du, dv, inv
|
||||
// t == Lt (alias)
|
||||
|
||||
sub dgdx, Rg, Lg // dgdx = Rg - Lg
|
||||
mul dgdx, inv // dgdx *= FixedInvU(width)
|
||||
asr dgdx, #15 // dgdx >>= 15
|
||||
// g == Lg (alias)
|
||||
|
||||
sub duv, Rt, Lt // duv = Rt - Lt
|
||||
asr du, duv, #16
|
||||
mul du, inv // du = inv * int16(duv >> 16)
|
||||
lsl dv, duv, #16
|
||||
asr dv, #16
|
||||
mul dv, inv // dv = inv * int16(duv)
|
||||
lsr du, #16
|
||||
lsl du, #16
|
||||
orr dtdx, du, dv, lsr #16 // dtdx = (du & 0xFFFF0000) | (dv >> 16)
|
||||
// t == Lt (alias)
|
||||
|
||||
// 2 bytes alignment (VRAM write requirement)
|
||||
.align_left:
|
||||
tst ptr, #1 // if (ptr & 1)
|
||||
beq .align_right
|
||||
|
||||
and indexA, t, #0xFF00
|
||||
orr indexA, t, lsr #24 // res = (t & 0xFF00) | (t >> 24)
|
||||
ldrb indexA, [TILE, indexA]
|
||||
tex indexA, t
|
||||
|
||||
cmp indexA, #0
|
||||
beq .skip_left
|
||||
@ -296,29 +262,24 @@ rasterizeGTA_asm:
|
||||
tst width, #1
|
||||
beq .align_block_4px
|
||||
|
||||
ldrb indexB, [ptr, width]
|
||||
|
||||
sub width, #1 // width--
|
||||
|
||||
mla Rti, width, dtdx, t // Rti = width * dtdx + t
|
||||
and indexA, Rti, #0xFF00
|
||||
orr indexA, Rti, lsr #24 // res = (t & 0xFF00) | (t >> 24)
|
||||
ldrb indexA, [TILE, indexA]
|
||||
sub Rti, Rt, dtdx
|
||||
tex indexA, Rti
|
||||
|
||||
cmp indexA, #0
|
||||
subeq width, #1
|
||||
beq .skip_right
|
||||
|
||||
asr Rgi, dgdx, #1
|
||||
mla Rgi, width, Rgi, g // Rgi = width * (dgdx / 2) + g
|
||||
sub Rgi, Rg, dgdx, asr #1
|
||||
bic LMAP, Rgi, #255
|
||||
lit indexA
|
||||
|
||||
ldrb indexA, [LMAP, indexA]
|
||||
|
||||
ldrb indexB, [ptr, width]
|
||||
sub width, #1 // width--
|
||||
orr indexB, indexA, indexB, lsl #8
|
||||
strh indexB, [ptr, width]
|
||||
|
||||
.skip_right:
|
||||
cmp width, #0 // width--
|
||||
cmp width, #0
|
||||
beq .scanline_end // if (width == 0)
|
||||
|
||||
.align_block_4px:
|
||||
@ -350,10 +311,9 @@ rasterizeGTA_asm:
|
||||
bne .scanline_block_8px
|
||||
|
||||
.scanline_end:
|
||||
ldmfd sp!, {Lx,Rx,Lg,Rg,Lt,Rt} // sp+24
|
||||
ldmfd sp!, {Lx, Lg, Lt}
|
||||
|
||||
add tmp, sp, #16
|
||||
ldmia tmp, {sLdx, sLdg, sLdt, sRdx, sRdg}
|
||||
ldmia sp, {sLdx, sLdg, sLdt, sRdx, sRdg}
|
||||
|
||||
add Lx, sLdx
|
||||
add Lg, sLdg
|
||||
@ -361,17 +321,18 @@ rasterizeGTA_asm:
|
||||
add Rx, sRdx
|
||||
add Rg, sRdg
|
||||
|
||||
ldr tmp, [sp, #(SP_RDT + 16)]
|
||||
add Rt, tmp // Rt += Rdt from stack
|
||||
ldr sRdt, [sp, #SP_RDT]
|
||||
add Rt, sRdt
|
||||
|
||||
add pixel, #FRAME_WIDTH // pixel += FRAME_WIDTH (240)
|
||||
|
||||
subs h, #1
|
||||
bne .scanline_start
|
||||
|
||||
ldmfd sp!, {L,R,Lh,Rh} // sp+16
|
||||
add tmp, sp, #SP_L
|
||||
ldmia tmp, {L, R, Lh, Rh}
|
||||
b .loop
|
||||
|
||||
.exit:
|
||||
add sp, #24 // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt]
|
||||
add sp, #(SP_SIZE + 4) // revert reserved space for [Ldx, Ldg, Ldt, Rdx, Rdg, Rdt, TILE]
|
||||
ldmfd sp!, {r4-r11, pc}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -153,7 +153,8 @@ void rasterizeS_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
|
||||
void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
{
|
||||
uint16 color = gLightmap[(L->v.g << 8) | L->v.clip];
|
||||
uint32 color = (uint32)R;
|
||||
color = gLightmap[(L->v.g << 8) | color];
|
||||
color |= (color << 8);
|
||||
|
||||
int32 Lh = 0;
|
||||
@ -163,6 +164,8 @@ void rasterizeF_c(uint16* pixel, const VertexLink* L, const VertexLink* R)
|
||||
int32 Rx;
|
||||
int32 Lx;
|
||||
|
||||
R = L;
|
||||
|
||||
while (1)
|
||||
{
|
||||
while (!Lh)
|
||||
|
@ -49,12 +49,12 @@ enum FaceType {
|
||||
FACE_TYPE_MAX
|
||||
};
|
||||
|
||||
#define FACE_TRIANGLE (1 << 13)
|
||||
#define FACE_TRIANGLE (1 << 19)
|
||||
#define FACE_CLIPPED (1 << 18)
|
||||
#define FACE_TYPE_SHIFT 14
|
||||
#define FACE_TYPE_MASK 15
|
||||
#define FACE_GOURAUD (2 << FACE_TYPE_SHIFT)
|
||||
#define FACE_TEXTURE 0x1FFF
|
||||
#define FACE_TEXTURE 0x3FFF
|
||||
|
||||
#include "rasterizer.h"
|
||||
|
||||
@ -411,6 +411,7 @@ void faceAddRoomTriangles_c(const RoomTriangle* polys, int32 count)
|
||||
if (g0 != g1 || g0 != g2) {
|
||||
flags += FACE_GOURAUD;
|
||||
}
|
||||
flags |= FACE_TRIANGLE;
|
||||
|
||||
if (checkBackface(v0, v1, v2))
|
||||
continue;
|
||||
@ -487,6 +488,7 @@ void faceAddMeshTriangles_c(const MeshTriangle* polys, int32 count)
|
||||
if ((c0 | c1 | c2) & CLIP_MASK_VP) {
|
||||
flags |= FACE_CLIPPED;
|
||||
}
|
||||
flags |= FACE_TRIANGLE;
|
||||
|
||||
int32 depth = (v0->z + v1->z + v2->z + v2->z) >> (2 + OT_SHIFT);
|
||||
|
||||
@ -634,11 +636,9 @@ X_NOINLINE void rasterize_c(uint32 flags, VertexLink* top)
|
||||
|
||||
uint32 type = (flags >> FACE_TYPE_SHIFT) & FACE_TYPE_MASK;
|
||||
|
||||
if (type == FACE_TYPE_F) {
|
||||
top->v.clip = flags; // use tex coord as color index for untextured polys
|
||||
}
|
||||
VertexLink* R = (type == FACE_TYPE_F) ? (VertexLink*)(flags & 0xFF) : top;
|
||||
|
||||
gRasterProc[type]((uint16*)pixel, top, top);
|
||||
gRasterProc[type]((uint16*)pixel, top, R);
|
||||
}
|
||||
|
||||
void flush_c()
|
||||
|
Loading…
x
Reference in New Issue
Block a user