mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-01 02:40:43 +02:00
#368 GBA conditional mul in rasterizer
This commit is contained in:
@@ -121,11 +121,11 @@
|
||||
.endm
|
||||
|
||||
.macro scaleUV uv, tmp, tmp2, f
|
||||
smull \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
|
||||
smullne \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
|
||||
|
||||
lsl \uv, #16
|
||||
asr \uv, #16
|
||||
mul \uv, \f // v = f * int16(uv)
|
||||
asrs \uv, #16
|
||||
mulne \uv, \f // v = f * int16(uv)
|
||||
lsr \uv, #16
|
||||
|
||||
orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)
|
||||
|
@@ -25,8 +25,6 @@ Lxy .req tmp
|
||||
Ly2 .req Lh
|
||||
LMAP .req Lx
|
||||
ptr .req tmp
|
||||
Ltmp .req N
|
||||
Rtmp .req N
|
||||
|
||||
.global rasterizeF_asm
|
||||
rasterizeF_asm:
|
||||
@@ -57,9 +55,9 @@ rasterizeF_asm:
|
||||
|
||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||
|
||||
ldrsh Ltmp, [L, #VERTEX_X]
|
||||
sub Ltmp, Lx, asr #16
|
||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - L->v.x)
|
||||
ldrsh Ldx, [L, #VERTEX_X]
|
||||
subs Ldx, Lx, asr #16
|
||||
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - L->v.x)
|
||||
.calc_left_end:
|
||||
|
||||
cmp Rh, #0
|
||||
@@ -81,9 +79,9 @@ rasterizeF_asm:
|
||||
|
||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||
|
||||
ldrsh Rtmp, [R, #VERTEX_X]
|
||||
sub Rtmp, Rx, asr #16
|
||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
||||
ldrsh Rdx, [R, #VERTEX_X]
|
||||
subs Rdx, Rx, asr #16
|
||||
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||
.calc_right_end:
|
||||
|
||||
cmp Rh, Lh // if (Rh < Lh)
|
||||
|
@@ -95,12 +95,12 @@ rasterizeFT_asm:
|
||||
|
||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||
|
||||
ldrsh Ltmp, [L, #VERTEX_X]
|
||||
sub Ltmp, Lx, asr #16
|
||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
||||
ldrsh Ldx, [L, #VERTEX_X]
|
||||
subs Ldx, Lx, asr #16
|
||||
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||
|
||||
ldr Ldt, [L, #VERTEX_T]
|
||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
||||
subs Ldt, Lt // Ldt = N->v.t - Lt
|
||||
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
||||
.calc_left_end:
|
||||
|
||||
@@ -125,12 +125,12 @@ rasterizeFT_asm:
|
||||
|
||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||
|
||||
ldrsh Rtmp, [R, #VERTEX_X]
|
||||
sub Rtmp, Rx, asr #16
|
||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
||||
ldrsh Rdx, [R, #VERTEX_X]
|
||||
subs Rdx, Rx, asr #16
|
||||
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||
|
||||
ldr Rdt, [R, #VERTEX_T]
|
||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
||||
subs Rdt, Rt // Rdt = N->v.t - Rt
|
||||
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
||||
.calc_right_end:
|
||||
|
||||
@@ -153,7 +153,7 @@ rasterizeFT_asm:
|
||||
|
||||
divLUT inv, width // inv = FixedInvU(width)
|
||||
|
||||
sub dtdx, Rt, Lt // duv = Rt - Lt
|
||||
subs dtdx, Rt, Lt // duv = Rt - Lt
|
||||
scaleUV dtdx, dtmp, dtmp2, inv
|
||||
|
||||
mov t, Lt // t = Lt
|
||||
|
@@ -96,12 +96,12 @@ rasterizeFTA_asm:
|
||||
|
||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||
|
||||
ldrsh Ltmp, [L, #VERTEX_X]
|
||||
sub Ltmp, Lx, asr #16
|
||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
||||
ldrsh Ldx, [L, #VERTEX_X]
|
||||
subs Ldx, Lx, asr #16
|
||||
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||
|
||||
ldr Ldt, [L, #VERTEX_T]
|
||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
||||
subs Ldt, Lt // Ldt = N->v.t - Lt
|
||||
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
||||
.calc_left_end:
|
||||
|
||||
@@ -126,12 +126,12 @@ rasterizeFTA_asm:
|
||||
|
||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||
|
||||
ldrsh Rtmp, [R, #VERTEX_X]
|
||||
sub Rtmp, Rx, asr #16
|
||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
||||
ldrsh Rdx, [R, #VERTEX_X]
|
||||
subs Rdx, Rx, asr #16
|
||||
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||
|
||||
ldr Rdt, [R, #VERTEX_T]
|
||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
||||
subs Rdt, Rt // Rdt = N->v.t - Rt
|
||||
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
||||
.calc_right_end:
|
||||
|
||||
@@ -154,7 +154,7 @@ rasterizeFTA_asm:
|
||||
|
||||
divLUT inv, width // inv = FixedInvU(width)
|
||||
|
||||
sub dtdx, Rt, Lt // duv = Rt - Lt
|
||||
subs dtdx, Rt, Lt // duv = Rt - Lt
|
||||
scaleUV dtdx, dtmp, dtmp2, inv
|
||||
|
||||
mov t, Lt // t = Lt
|
||||
|
@@ -107,17 +107,17 @@ rasterizeGT_asm:
|
||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||
|
||||
fiq_on
|
||||
ldrsh Ltmp, [N, #VERTEX_X]
|
||||
sub Ltmp, Lx, asr #16
|
||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
||||
ldrsh Ldx, [N, #VERTEX_X]
|
||||
subs Ldx, Lx, asr #16
|
||||
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||
|
||||
ldrb Ltmp, [N, #VERTEX_G]
|
||||
sub Ltmp, Lg, lsr #(8 + G_EXTRA)
|
||||
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
|
||||
ldrb Ldg, [N, #VERTEX_G]
|
||||
subs Ldg, Lg, lsr #(8 + G_EXTRA)
|
||||
mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
|
||||
asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
|
||||
|
||||
ldr Ldt, [N, #VERTEX_T]
|
||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
||||
subs Ldt, Lt // Ldt = N->v.t - Lt
|
||||
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
||||
fiq_off
|
||||
.calc_left_end:
|
||||
@@ -146,17 +146,17 @@ rasterizeGT_asm:
|
||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||
|
||||
fiq_on
|
||||
ldrsh Rtmp, [N, #VERTEX_X]
|
||||
sub Rtmp, Rx, asr #16
|
||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
||||
ldrsh Rdx, [N, #VERTEX_X]
|
||||
subs Rdx, Rx, asr #16
|
||||
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||
|
||||
ldrb Rtmp, [N, #VERTEX_G]
|
||||
sub Rtmp, Rg, lsr #(8 + G_EXTRA)
|
||||
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
|
||||
ldrb Rdg, [N, #VERTEX_G]
|
||||
subs Rdg, Rg, lsr #(8 + G_EXTRA)
|
||||
mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
|
||||
asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
|
||||
|
||||
ldr Rdt, [N, #VERTEX_T]
|
||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
||||
subs Rdt, Rt // Rdt = N->v.t - Rt
|
||||
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
||||
fiq_off
|
||||
.calc_right_end:
|
||||
@@ -183,12 +183,12 @@ rasterizeGT_asm:
|
||||
|
||||
divLUT inv, width // inv = FixedInvU(width)
|
||||
|
||||
sub dtdx, Rt, Lt // dtdx = Rt - Lt
|
||||
subs dtdx, Rt, Lt // dtdx = Rt - Lt
|
||||
scaleUV dtdx, dtmp, dtmp2, inv
|
||||
// t == Lt (alias)
|
||||
|
||||
sub dgdx, Rg, Lg // dgdx = Rg - Lg
|
||||
mul dgdx, inv // dgdx *= FixedInvU(width)
|
||||
subs dgdx, Rg, Lg // dgdx = Rg - Lg
|
||||
mulne dgdx, inv // dgdx *= FixedInvU(width)
|
||||
asr dgdx, #16 // dgdx >>= 16
|
||||
// g == Lg (alias)
|
||||
|
||||
|
@@ -106,17 +106,17 @@ rasterizeGTA_asm:
|
||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||
|
||||
fiq_on
|
||||
ldrsh Ltmp, [N, #VERTEX_X]
|
||||
sub Ltmp, Lx, asr #16
|
||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
||||
ldrsh Ldx, [N, #VERTEX_X]
|
||||
subs Ldx, Lx, asr #16
|
||||
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||
|
||||
ldrb Ltmp, [N, #VERTEX_G]
|
||||
sub Ltmp, Lg, lsr #8
|
||||
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
|
||||
ldrb Ldg, [N, #VERTEX_G]
|
||||
subs Ldg, Lg, lsr #8
|
||||
mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
|
||||
asr Ldg, #8 // 8-bit for fractional part
|
||||
|
||||
ldr Ldt, [N, #VERTEX_T]
|
||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
||||
subs Ldt, Lt // Ldt = N->v.t - Lt
|
||||
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
||||
fiq_off
|
||||
.calc_left_end:
|
||||
@@ -145,17 +145,17 @@ rasterizeGTA_asm:
|
||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||
|
||||
fiq_on
|
||||
ldrsh Rtmp, [N, #VERTEX_X]
|
||||
sub Rtmp, Rx, asr #16
|
||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
||||
ldrsh Rdx, [N, #VERTEX_X]
|
||||
subs Rdx, Rx, asr #16
|
||||
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||
|
||||
ldrb Rtmp, [N, #VERTEX_G]
|
||||
sub Rtmp, Rg, lsr #8
|
||||
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
|
||||
ldrb Rdg, [N, #VERTEX_G]
|
||||
subs Rdg, Rg, lsr #8
|
||||
mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
|
||||
asr Rdg, #8 // 8-bit for fractional part
|
||||
|
||||
ldr Rdt, [N, #VERTEX_T]
|
||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
||||
subs Rdt, Rt // Rdt = N->v.t - Rt
|
||||
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
||||
fiq_off
|
||||
.calc_right_end:
|
||||
@@ -182,12 +182,12 @@ rasterizeGTA_asm:
|
||||
|
||||
divLUT inv, width // inv = FixedInvU(width)
|
||||
|
||||
sub dtdx, Rt, Lt // dtdx = Rt - Lt
|
||||
subs dtdx, Rt, Lt // dtdx = Rt - Lt
|
||||
scaleUV dtdx, dtmp, dtmp2, inv
|
||||
// t == Lt (alias)
|
||||
|
||||
sub dgdx, Rg, Lg // dgdx = Rg - Lg
|
||||
mul dgdx, inv // dgdx *= FixedInvU(width)
|
||||
subs dgdx, Rg, Lg // dgdx = Rg - Lg
|
||||
mulne dgdx, inv // dgdx *= FixedInvU(width)
|
||||
asr dgdx, #16 // dgdx >>= 16
|
||||
// g == Lg (alias)
|
||||
|
||||
|
@@ -23,8 +23,6 @@ Ry2 .req Rh
|
||||
Lxy .req tmp
|
||||
Ly2 .req Lh
|
||||
indexB .req pair
|
||||
Ltmp .req N
|
||||
Rtmp .req N
|
||||
|
||||
.global rasterizeS_asm
|
||||
rasterizeS_asm:
|
||||
@@ -52,9 +50,9 @@ rasterizeS_asm:
|
||||
|
||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||
|
||||
ldrsh Ltmp, [L, #VERTEX_X]
|
||||
sub Ltmp, Lx, asr #16
|
||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
||||
ldrsh Ldx, [L, #VERTEX_X]
|
||||
subs Ldx, Lx, asr #16
|
||||
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||
.calc_left_end:
|
||||
|
||||
cmp Rh, #0
|
||||
@@ -76,9 +74,9 @@ rasterizeS_asm:
|
||||
|
||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||
|
||||
ldrsh Rtmp, [R, #VERTEX_X]
|
||||
sub Rtmp, Rx, asr #16
|
||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
||||
ldrsh Rdx, [R, #VERTEX_X]
|
||||
subs Rdx, Rx, asr #16
|
||||
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||
.calc_right_end:
|
||||
|
||||
cmp Rh, Lh // if (Rh < Lh)
|
||||
|
Reference in New Issue
Block a user