mirror of
https://github.com/XProger/OpenLara.git
synced 2025-08-06 21:26:56 +02:00
#368 GBA conditional mul in rasterizer
This commit is contained in:
@@ -121,11 +121,11 @@
|
|||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro scaleUV uv, tmp, tmp2, f
|
.macro scaleUV uv, tmp, tmp2, f
|
||||||
smull \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
|
smullne \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
|
||||||
|
|
||||||
lsl \uv, #16
|
lsl \uv, #16
|
||||||
asr \uv, #16
|
asrs \uv, #16
|
||||||
mul \uv, \f // v = f * int16(uv)
|
mulne \uv, \f // v = f * int16(uv)
|
||||||
lsr \uv, #16
|
lsr \uv, #16
|
||||||
|
|
||||||
orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)
|
orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)
|
||||||
|
@@ -25,8 +25,6 @@ Lxy .req tmp
|
|||||||
Ly2 .req Lh
|
Ly2 .req Lh
|
||||||
LMAP .req Lx
|
LMAP .req Lx
|
||||||
ptr .req tmp
|
ptr .req tmp
|
||||||
Ltmp .req N
|
|
||||||
Rtmp .req N
|
|
||||||
|
|
||||||
.global rasterizeF_asm
|
.global rasterizeF_asm
|
||||||
rasterizeF_asm:
|
rasterizeF_asm:
|
||||||
@@ -57,9 +55,9 @@ rasterizeF_asm:
|
|||||||
|
|
||||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||||
|
|
||||||
ldrsh Ltmp, [L, #VERTEX_X]
|
ldrsh Ldx, [L, #VERTEX_X]
|
||||||
sub Ltmp, Lx, asr #16
|
subs Ldx, Lx, asr #16
|
||||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - L->v.x)
|
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - L->v.x)
|
||||||
.calc_left_end:
|
.calc_left_end:
|
||||||
|
|
||||||
cmp Rh, #0
|
cmp Rh, #0
|
||||||
@@ -81,9 +79,9 @@ rasterizeF_asm:
|
|||||||
|
|
||||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||||
|
|
||||||
ldrsh Rtmp, [R, #VERTEX_X]
|
ldrsh Rdx, [R, #VERTEX_X]
|
||||||
sub Rtmp, Rx, asr #16
|
subs Rdx, Rx, asr #16
|
||||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||||
.calc_right_end:
|
.calc_right_end:
|
||||||
|
|
||||||
cmp Rh, Lh // if (Rh < Lh)
|
cmp Rh, Lh // if (Rh < Lh)
|
||||||
|
@@ -95,12 +95,12 @@ rasterizeFT_asm:
|
|||||||
|
|
||||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||||
|
|
||||||
ldrsh Ltmp, [L, #VERTEX_X]
|
ldrsh Ldx, [L, #VERTEX_X]
|
||||||
sub Ltmp, Lx, asr #16
|
subs Ldx, Lx, asr #16
|
||||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||||
|
|
||||||
ldr Ldt, [L, #VERTEX_T]
|
ldr Ldt, [L, #VERTEX_T]
|
||||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
subs Ldt, Lt // Ldt = N->v.t - Lt
|
||||||
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
||||||
.calc_left_end:
|
.calc_left_end:
|
||||||
|
|
||||||
@@ -125,12 +125,12 @@ rasterizeFT_asm:
|
|||||||
|
|
||||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||||
|
|
||||||
ldrsh Rtmp, [R, #VERTEX_X]
|
ldrsh Rdx, [R, #VERTEX_X]
|
||||||
sub Rtmp, Rx, asr #16
|
subs Rdx, Rx, asr #16
|
||||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||||
|
|
||||||
ldr Rdt, [R, #VERTEX_T]
|
ldr Rdt, [R, #VERTEX_T]
|
||||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
subs Rdt, Rt // Rdt = N->v.t - Rt
|
||||||
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
||||||
.calc_right_end:
|
.calc_right_end:
|
||||||
|
|
||||||
@@ -153,7 +153,7 @@ rasterizeFT_asm:
|
|||||||
|
|
||||||
divLUT inv, width // inv = FixedInvU(width)
|
divLUT inv, width // inv = FixedInvU(width)
|
||||||
|
|
||||||
sub dtdx, Rt, Lt // duv = Rt - Lt
|
subs dtdx, Rt, Lt // duv = Rt - Lt
|
||||||
scaleUV dtdx, dtmp, dtmp2, inv
|
scaleUV dtdx, dtmp, dtmp2, inv
|
||||||
|
|
||||||
mov t, Lt // t = Lt
|
mov t, Lt // t = Lt
|
||||||
|
@@ -96,12 +96,12 @@ rasterizeFTA_asm:
|
|||||||
|
|
||||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||||
|
|
||||||
ldrsh Ltmp, [L, #VERTEX_X]
|
ldrsh Ldx, [L, #VERTEX_X]
|
||||||
sub Ltmp, Lx, asr #16
|
subs Ldx, Lx, asr #16
|
||||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||||
|
|
||||||
ldr Ldt, [L, #VERTEX_T]
|
ldr Ldt, [L, #VERTEX_T]
|
||||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
subs Ldt, Lt // Ldt = N->v.t - Lt
|
||||||
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
||||||
.calc_left_end:
|
.calc_left_end:
|
||||||
|
|
||||||
@@ -126,12 +126,12 @@ rasterizeFTA_asm:
|
|||||||
|
|
||||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||||
|
|
||||||
ldrsh Rtmp, [R, #VERTEX_X]
|
ldrsh Rdx, [R, #VERTEX_X]
|
||||||
sub Rtmp, Rx, asr #16
|
subs Rdx, Rx, asr #16
|
||||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||||
|
|
||||||
ldr Rdt, [R, #VERTEX_T]
|
ldr Rdt, [R, #VERTEX_T]
|
||||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
subs Rdt, Rt // Rdt = N->v.t - Rt
|
||||||
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
||||||
.calc_right_end:
|
.calc_right_end:
|
||||||
|
|
||||||
@@ -154,7 +154,7 @@ rasterizeFTA_asm:
|
|||||||
|
|
||||||
divLUT inv, width // inv = FixedInvU(width)
|
divLUT inv, width // inv = FixedInvU(width)
|
||||||
|
|
||||||
sub dtdx, Rt, Lt // duv = Rt - Lt
|
subs dtdx, Rt, Lt // duv = Rt - Lt
|
||||||
scaleUV dtdx, dtmp, dtmp2, inv
|
scaleUV dtdx, dtmp, dtmp2, inv
|
||||||
|
|
||||||
mov t, Lt // t = Lt
|
mov t, Lt // t = Lt
|
||||||
|
@@ -107,17 +107,17 @@ rasterizeGT_asm:
|
|||||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||||
|
|
||||||
fiq_on
|
fiq_on
|
||||||
ldrsh Ltmp, [N, #VERTEX_X]
|
ldrsh Ldx, [N, #VERTEX_X]
|
||||||
sub Ltmp, Lx, asr #16
|
subs Ldx, Lx, asr #16
|
||||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||||
|
|
||||||
ldrb Ltmp, [N, #VERTEX_G]
|
ldrb Ldg, [N, #VERTEX_G]
|
||||||
sub Ltmp, Lg, lsr #(8 + G_EXTRA)
|
subs Ldg, Lg, lsr #(8 + G_EXTRA)
|
||||||
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
|
mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
|
||||||
asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
|
asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
|
||||||
|
|
||||||
ldr Ldt, [N, #VERTEX_T]
|
ldr Ldt, [N, #VERTEX_T]
|
||||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
subs Ldt, Lt // Ldt = N->v.t - Lt
|
||||||
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
||||||
fiq_off
|
fiq_off
|
||||||
.calc_left_end:
|
.calc_left_end:
|
||||||
@@ -146,17 +146,17 @@ rasterizeGT_asm:
|
|||||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||||
|
|
||||||
fiq_on
|
fiq_on
|
||||||
ldrsh Rtmp, [N, #VERTEX_X]
|
ldrsh Rdx, [N, #VERTEX_X]
|
||||||
sub Rtmp, Rx, asr #16
|
subs Rdx, Rx, asr #16
|
||||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||||
|
|
||||||
ldrb Rtmp, [N, #VERTEX_G]
|
ldrb Rdg, [N, #VERTEX_G]
|
||||||
sub Rtmp, Rg, lsr #(8 + G_EXTRA)
|
subs Rdg, Rg, lsr #(8 + G_EXTRA)
|
||||||
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
|
mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
|
||||||
asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
|
asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part
|
||||||
|
|
||||||
ldr Rdt, [N, #VERTEX_T]
|
ldr Rdt, [N, #VERTEX_T]
|
||||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
subs Rdt, Rt // Rdt = N->v.t - Rt
|
||||||
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
||||||
fiq_off
|
fiq_off
|
||||||
.calc_right_end:
|
.calc_right_end:
|
||||||
@@ -183,12 +183,12 @@ rasterizeGT_asm:
|
|||||||
|
|
||||||
divLUT inv, width // inv = FixedInvU(width)
|
divLUT inv, width // inv = FixedInvU(width)
|
||||||
|
|
||||||
sub dtdx, Rt, Lt // dtdx = Rt - Lt
|
subs dtdx, Rt, Lt // dtdx = Rt - Lt
|
||||||
scaleUV dtdx, dtmp, dtmp2, inv
|
scaleUV dtdx, dtmp, dtmp2, inv
|
||||||
// t == Lt (alias)
|
// t == Lt (alias)
|
||||||
|
|
||||||
sub dgdx, Rg, Lg // dgdx = Rg - Lg
|
subs dgdx, Rg, Lg // dgdx = Rg - Lg
|
||||||
mul dgdx, inv // dgdx *= FixedInvU(width)
|
mulne dgdx, inv // dgdx *= FixedInvU(width)
|
||||||
asr dgdx, #16 // dgdx >>= 16
|
asr dgdx, #16 // dgdx >>= 16
|
||||||
// g == Lg (alias)
|
// g == Lg (alias)
|
||||||
|
|
||||||
|
@@ -106,17 +106,17 @@ rasterizeGTA_asm:
|
|||||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||||
|
|
||||||
fiq_on
|
fiq_on
|
||||||
ldrsh Ltmp, [N, #VERTEX_X]
|
ldrsh Ldx, [N, #VERTEX_X]
|
||||||
sub Ltmp, Lx, asr #16
|
subs Ldx, Lx, asr #16
|
||||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||||
|
|
||||||
ldrb Ltmp, [N, #VERTEX_G]
|
ldrb Ldg, [N, #VERTEX_G]
|
||||||
sub Ltmp, Lg, lsr #8
|
subs Ldg, Lg, lsr #8
|
||||||
mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg)
|
mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg)
|
||||||
asr Ldg, #8 // 8-bit for fractional part
|
asr Ldg, #8 // 8-bit for fractional part
|
||||||
|
|
||||||
ldr Ldt, [N, #VERTEX_T]
|
ldr Ldt, [N, #VERTEX_T]
|
||||||
sub Ldt, Lt // Ldt = N->v.t - Lt
|
subs Ldt, Lt // Ldt = N->v.t - Lt
|
||||||
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
scaleUV Ldt, Ltmp, Ltmp2, tmp
|
||||||
fiq_off
|
fiq_off
|
||||||
.calc_left_end:
|
.calc_left_end:
|
||||||
@@ -145,17 +145,17 @@ rasterizeGTA_asm:
|
|||||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||||
|
|
||||||
fiq_on
|
fiq_on
|
||||||
ldrsh Rtmp, [N, #VERTEX_X]
|
ldrsh Rdx, [N, #VERTEX_X]
|
||||||
sub Rtmp, Rx, asr #16
|
subs Rdx, Rx, asr #16
|
||||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||||
|
|
||||||
ldrb Rtmp, [N, #VERTEX_G]
|
ldrb Rdg, [N, #VERTEX_G]
|
||||||
sub Rtmp, Rg, lsr #8
|
subs Rdg, Rg, lsr #8
|
||||||
mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg)
|
mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg)
|
||||||
asr Rdg, #8 // 8-bit for fractional part
|
asr Rdg, #8 // 8-bit for fractional part
|
||||||
|
|
||||||
ldr Rdt, [N, #VERTEX_T]
|
ldr Rdt, [N, #VERTEX_T]
|
||||||
sub Rdt, Rt // Rdt = N->v.t - Rt
|
subs Rdt, Rt // Rdt = N->v.t - Rt
|
||||||
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
scaleUV Rdt, Rtmp, Rtmp2, tmp
|
||||||
fiq_off
|
fiq_off
|
||||||
.calc_right_end:
|
.calc_right_end:
|
||||||
@@ -182,12 +182,12 @@ rasterizeGTA_asm:
|
|||||||
|
|
||||||
divLUT inv, width // inv = FixedInvU(width)
|
divLUT inv, width // inv = FixedInvU(width)
|
||||||
|
|
||||||
sub dtdx, Rt, Lt // dtdx = Rt - Lt
|
subs dtdx, Rt, Lt // dtdx = Rt - Lt
|
||||||
scaleUV dtdx, dtmp, dtmp2, inv
|
scaleUV dtdx, dtmp, dtmp2, inv
|
||||||
// t == Lt (alias)
|
// t == Lt (alias)
|
||||||
|
|
||||||
sub dgdx, Rg, Lg // dgdx = Rg - Lg
|
subs dgdx, Rg, Lg // dgdx = Rg - Lg
|
||||||
mul dgdx, inv // dgdx *= FixedInvU(width)
|
mulne dgdx, inv // dgdx *= FixedInvU(width)
|
||||||
asr dgdx, #16 // dgdx >>= 16
|
asr dgdx, #16 // dgdx >>= 16
|
||||||
// g == Lg (alias)
|
// g == Lg (alias)
|
||||||
|
|
||||||
|
@@ -23,8 +23,6 @@ Ry2 .req Rh
|
|||||||
Lxy .req tmp
|
Lxy .req tmp
|
||||||
Ly2 .req Lh
|
Ly2 .req Lh
|
||||||
indexB .req pair
|
indexB .req pair
|
||||||
Ltmp .req N
|
|
||||||
Rtmp .req N
|
|
||||||
|
|
||||||
.global rasterizeS_asm
|
.global rasterizeS_asm
|
||||||
rasterizeS_asm:
|
rasterizeS_asm:
|
||||||
@@ -52,9 +50,9 @@ rasterizeS_asm:
|
|||||||
|
|
||||||
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
divLUT tmp, Lh // tmp = FixedInvU(Lh)
|
||||||
|
|
||||||
ldrsh Ltmp, [L, #VERTEX_X]
|
ldrsh Ldx, [L, #VERTEX_X]
|
||||||
sub Ltmp, Lx, asr #16
|
subs Ldx, Lx, asr #16
|
||||||
mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx)
|
mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx)
|
||||||
.calc_left_end:
|
.calc_left_end:
|
||||||
|
|
||||||
cmp Rh, #0
|
cmp Rh, #0
|
||||||
@@ -76,9 +74,9 @@ rasterizeS_asm:
|
|||||||
|
|
||||||
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
divLUT tmp, Rh // tmp = FixedInvU(Rh)
|
||||||
|
|
||||||
ldrsh Rtmp, [R, #VERTEX_X]
|
ldrsh Rdx, [R, #VERTEX_X]
|
||||||
sub Rtmp, Rx, asr #16
|
subs Rdx, Rx, asr #16
|
||||||
mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx)
|
mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx)
|
||||||
.calc_right_end:
|
.calc_right_end:
|
||||||
|
|
||||||
cmp Rh, Lh // if (Rh < Lh)
|
cmp Rh, Lh // if (Rh < Lh)
|
||||||
|
Reference in New Issue
Block a user